#TO DO:
# all data files should source from single replication folder set by here(), rather than team-specific directories
# metadata with datafiles should be separate documents
# replication directory should have a ReadMe.txt file that describes each individual file and variables for data file
# replication directory should have copies of all data collection instruments

######################################################################################
######################################################################################
#Replication code for:

#Mark T. Buntaine, Patrick Hunnicutt, and Polycarp Komakech
#The Challenges of Using Citizen Reporting to Improve Public Services: A Field Experiment on Solid Waste Services in Uganda
#Journal of Public Administration Research & Theory

#Prepared by: Patrick Hunnicutt
#Patrick Hunnicutt contact (as of May 2020): hunnicutt@bren.ucsb.edu

#Compiled using R Version 3.6.1 (2019-07-05) (version "Action of the Toes") on x86_64-apple-darwin15.6.0 (64-bit)
######################################################################################
######################################################################################


##### PREAMBLE #####
rm(list=ls())

## (1) Load Packages -----
library(plyr) #Version X.XX
library(dplyr) #Version X.XX
library(readr)
library(stringr)
library(tidyr)
library(gdata) 
library(gtools) 
library(ggplot2)
library(grid)
library(gridExtra)
library(stringr)
library(xtable)
library(dummies)
library(devtools)
library(randomizr)
library(estimatr)
library(rgdal)
library(rgeos)
library(ri)
library(multiwayvcov)
library(lmtest)
library(stats)
library(lfe)
library(stargazer)
library(raster)
library(here)
library(ggpubr)

## (2) Set Working Directory -----
setwd("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication")
# rmarkdown::render("kcca_replication.R", "pdf_document")
# setwd(here())
getwd()

## (3) Define Custom Functions -----
rcel <- function(n){
  out <- 10*ceiling(n/10)
  return(out)
}

balance.plot_f <- function(dta, var, treat, title){
  
  if (class(dta[,var])=="factor"){
    dta <- dta[!is.na(dta[,var]),]
    d <- table(dta[,treat], dta[,var])
    m <- max(d)
    n <- rcel(m)
    
    out <-  ggplot(data=dta, aes_string(x = var, fill = treat)) + 
      geom_histogram(data=dta[dta[,treat]==1,], stat="count", col="grey20", alpha=0.75) + 
      geom_histogram(data=dta[dta[,treat]==0,], stat="count", aes(y=..count..*(-1)), col="grey20", alpha=0.75) + 
      scale_y_continuous(limits=c(-n,n), breaks=seq(-n,n,n),labels=abs(seq(-n,n,n))) +
      coord_flip() + 
      # theme(legend.position="none") +
      ggtitle(title) +
      theme(plot.title = element_text(hjust = 0.5, size=12), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.text.y = element_text(size=8))
    return(out)}
}

balance.plot_n <- function(dta, var, treat, title){
  dta <- dta[!is.na(dta[,var]),]
  # d <- density(dta[,var])
  # n <- round_any(max(d$y), 0.05)
  
  out <- ggplot(data=dta, aes_string(x = var, fill = treat)) + 
    geom_histogram(data=dta[dta[,treat]==1,], col="grey20", alpha=0.75, bins=20) + 
    geom_histogram(data=dta[dta[,treat]==0,], aes(y=..count..*(-1)), col="grey20", alpha=0.75, bins=20) + 
    # scale_y_continuous(limits=c(-n,n), breaks=seq(-n,n,n),labels=abs(seq(-n,n,n))) +
    coord_flip() + 
    # theme(legend.position="none") +
    ggtitle(title) +
    theme(plot.title = element_text(hjust = 0.5, size=12), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.text.y = element_text(size=8))
  return(out)
}

balance_plot <- function(dta, var, treat, title){
  if (class(dta[,var])=="factor"){
    dta <- dta[!is.na(dta[,var]),]
    d <- table(dta[,treat], dta[,var])
    m <- max(d)
    n <- rcel(m)
    
    out <-  ggplot(data=dta, aes_string(x = var, fill = treat)) + 
      geom_histogram(data=dta[dta[,treat]==1,], stat="count", col="grey20", alpha=0.75) + 
      geom_histogram(data=dta[dta[,treat]==0,], stat="count", aes(y=..count..*(-1)), col="grey20", alpha=0.75) + 
      scale_y_continuous(limits=c(-n,n), breaks=seq(-n,n,n),labels=abs(seq(-n,n,n))) +
      coord_flip() + 
      # theme(legend.position="none") +
      ggtitle(title) +
      theme(plot.title = element_text(hjust = 0.5, size=12), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.text.y = element_text(size=8))
  } else if (class(dta[,var])=="numeric"|class(dta[,var])=="integer"){
    dta <- dta[!is.na(dta[,var]),]
    # d <- density(dta[,var])
    # n <- round_any(max(d$y), 0.05)
    
    out <- ggplot(data=dta, aes_string(x = var, fill = treat)) + 
      geom_histogram(data=dta[dta[,treat]==1,], col="grey20", alpha=0.75, bins=20) + 
      geom_histogram(data=dta[dta[,treat]==0,], aes(y=..count..*(-1)), col="grey20", alpha=0.75, bins=20) + 
      # scale_y_continuous(limits=c(-n,n), breaks=seq(-n,n,n),labels=abs(seq(-n,n,n))) +
      coord_flip() + 
      # theme(legend.position="none") +
      ggtitle(title) +
      theme(plot.title = element_text(hjust = 0.5, size=12), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.text.y = element_text(size=8))
  }
  return(out)
}

lm.ri <- function(formula, dta, treat.var, sims, clust_var, m,...){ #treatment variable needs to be in quotes
  require(lfe)
  
  ate <- coef(lm(formula, data=dta))[2]
  N <- nobs(lm(formula, data=dta))
  ate.samp.dist <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- cluster_ra(clust_var, m)
    ate.samp.dist[i] <- coef(lm(formula, data=dta))[2]
  }
  
  p.two.way <- sum(abs(ate)<abs(ate.samp.dist))/sims
  p.one.way.greater <- sum(ate<ate.samp.dist)/sims
  p.one.way.lesser <- sum(ate>ate.samp.dist)/sims
  se <- sd(ate.samp.dist)
  
  coun <- list("ate" = ate, "ate.samp.dist" = ate.samp.dist, "se"=se, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}

did.ri <- function(formula, dta, treat.var, sims, clust_var, m,...){   require(lfe)
  
  ate <- coef(lm(formula, data=dta))[4]
  N <- nobs(lm(formula, data=dta))
  ate.samp.dist <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- cluster_ra(clust_var, m)
    ate.samp.dist[i] <- coef(lm(formula, data=dta))[4]
  }
  
  p.two.way <- sum(abs(ate)<abs(ate.samp.dist))/sims
  p.one.way.greater <- sum(ate<ate.samp.dist)/sims
  p.one.way.lesser <- sum(ate>ate.samp.dist)/sims
  se <- sd(ate.samp.dist)
  
  coun <- list("ate" = ate, "ate.samp.dist" = ate.samp.dist, "se"=se, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}


#note: specify negative treatment effects
lm.ri.power.all3 <- function(formula.size, formula.dummy, formula.rank, 
                             dta, treat.var, outcome.var, binary.var, rank.var,
                             sims, clust_var, m, te,
                             size.bar, dummy.bar, rank.bar, ...){
  require(randomizr)
  
  outcome.store <- dta[,outcome.var]
  binary.store <- dta[,binary.var]
  
  ate.samp.dist.size <- rep(NA,sims)
  ate.samp.dist.dummy <- rep(NA,sims)
  ate.samp.dist.rank <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- cluster_ra(clust_var, m)
    
    total.treat <- -(sum(dta[,treat.var]==1)*te)
    
    #Size
    dta[,outcome.var] <- outcome.store
    
    while(total.treat>0){
      clean.it <- sample(row.names(dta[dta[,treat.var]==1 & dta[,outcome.var]>0,]), 1)
      
      if(total.treat>dta[row.names(dta)==clean.it, outcome.var]){
        total.treat <- total.treat - dta[row.names(dta)==clean.it, outcome.var]
        dta[row.names(dta)==clean.it, outcome.var] <- 0
      }
      
      if(total.treat<=dta[row.names(dta)==clean.it, outcome.var]){
        dta[row.names(dta)==clean.it, outcome.var] <- dta[row.names(dta)==clean.it, outcome.var] - total.treat
        total.treat <- 0
      }
    }
    
    ate.samp.dist.size[i] <- coef(lm(formula.size, data=dta))[2]
    
    #Dummy
    #dta[,binary.var] <- binary.store
    dta[,binary.var] <- ifelse(dta[,outcome.var]==0, 0, 1)
    ate.samp.dist.dummy[i] <- coef(lm(formula.dummy, data=dta))[2]
    
    #Rank
    dta[,rank.var] <- rank(dta[,outcome.var], na.last = "keep")
    ate.samp.dist.rank[i] <- coef(lm(formula.rank, data=dta))[2]
  }
  
  power.size <- sum(ate.samp.dist.size<size.bar)/sims
  power.dummy <- sum(ate.samp.dist.dummy<dummy.bar)/sims
  power.rank <- sum(ate.samp.dist.rank<rank.bar)/sims
  
  coun <- list("ate.samp.dist.size" = ate.samp.dist.size, "power.size"=power.size,
               "ate.samp.dist.dummy" = ate.samp.dist.dummy, "power.dummy"=power.dummy,
               "ate.samp.dist.rank" = ate.samp.dist.rank, "power.rank"=power.rank)
  return(coun)
}


lm.cluster<-function(model, cluster)
{
  require(multiwayvcov)
  require(lmtest)
  vcovCL<-cluster.vcov(model, cluster)
  
  coef <- coeftest(model, vcovCL)
  w <- waldtest.default(model, vcov = vcovCL, test = "F")

  return(list(coef, w))
} #This function computes cluster standard errors using an lm() fit


## (4) Read-in Data -----
main_dta <- read.csv("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/kcca_replication_all_dta.csv",
                     stringsAsFactors = FALSE, strip.white = TRUE) 
#main_dta: data on the full sample of audited informal waste piles.

subsetA_dta <- read.csv("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/kcca_replication_subsetA.csv",
                        stringsAsFactors = FALSE, strip.white = TRUE) 
#subsetA_dta: subsetted pile data; excludes piles due to discrepancies in the coordinates of the pile between audits.

subsetB_dta <- read.csv("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/kcca_replication_subsetB.csv",
                        stringsAsFactors = FALSE, strip.white = TRUE) 
#subsetB_dta: subsetted pile data; excludes piles due to discrepancies in the coordinates of the pile between audits & due to implausible 
#size measurements as determined by comparing recorded pile sizes with photos of waste piles. Also contains variables required for 
#analysis of spillover

hte_dta <- read.csv("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/kcca_replication_hte.csv",
                    stringsAsFactors = FALSE, strip.white = TRUE) 
#hte_dta: subsetted pile data (subsetB_dta) used for exploring HTE and sub-group effects.

lc1 <- readOGR(dsn="~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/LC1 Shapefiles",
               layer="Kampala_villagesfff") 
#lc1: shapefile containing polygons of zones, parishes, and divisions in Kampala.

rr <- read.csv("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/kcca_replication_reporting.csv",
               stringsAsFactors = FALSE, strip.white = TRUE) 
##rr: data on reporting rates of citizen reporters over the study period.

p1.sample <- read.csv("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/kcca_replication_p1sample.csv",
                      stringsAsFactors = FALSE, strip.white = TRUE)
##p1.sample: assignment data on piles sampled in project's first phase (used for mapping experimental sample).

p2.sample <- read.csv("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/kcca_replication_p2sample.csv",
                      stringsAsFactors = FALSE, strip.white = TRUE)
##p2.sample: assignment data on piles sampled in project's second phase (used for mapping experimental sample).

p3.sample <- read.csv("~/Google Drive File Stream/My Drive/Kampala Solid Waste project/Phase 3/KCCA_SolidWaste_ph3replication/kcca_replication_p3sample.csv",
                      stringsAsFactors = FALSE, strip.white = TRUE)
##p3.sample: assignment data on piles sampled in project's third phase (used for mapping experimental sample).

##  (5) Clean Data ----- 
##Assign Unique ID to Zones in Shapefile
lc1@data$zone.id <- rownames(lc1@data)

lc1@data$zpd_uid <- NA
lc1@data$zpd_uid <- paste(lc1@data$VNAME, "_", lc1@data$Parishes, "_", lc1@data$Division)
lc1@data$zpd_uid <- tolower(lc1@data$zpd_uid)
lc1@data$zpd_uid <- str_replace_all(lc1@data$zpd_uid, " ", "")
lc1@data$zpd_uid <- str_replace_all(lc1@data$zpd_uid, ",", "")
lc1@data$zpd_uid <- str_replace_all(lc1@data$zpd_uid, "-", ".")



##### FIGURES #####
## Figure 1: Reporting Rates -----
# tiff("./Figures/Figure1.tiff", width=5, height=3, units="in", res=300)
# par(mar = c(2.1, 4.1, 1, 1))
# plot(x=1:nrow(rr), y=rr$Usable.Response.Rate, type="l", col="red", lwd=3, xlab="Question Number", ylab="Proportion Reporting", ylim=c(0,0.2), xaxt="n")
# axis(1, at=1:nrow(rr), labels=rr$Question, cex.axis=0.45)
# dev.off()

# png("./Figures/Figure1.png", width=5, height=3, units="in", res=300)
par(mar = c(2.1, 4.1, 1, 1))
plot(x=1:nrow(rr), y=rr$Usable.Response.Rate, type="l", col="red", lwd=3, xlab="Question Number", ylab="Proportion Reporting", ylim=c(0,0.2), xaxt="n")
axis(1, at=1:nrow(rr), labels=rr$Question, cex.axis=0.45)
# dev.off()

## Figure 2: Map of Experimental Sample -----
trt.zones <- unique(main_dta$zone.id[main_dta$treat==1])
trt.zones <- trt.zones[!is.na(trt.zones)] 
ctl.zones <- unique(main_dta$zone.id[main_dta$treat==0])
ctl.zones <- ctl.zones[!is.na(ctl.zones)] 

###Bring in Phase 1/2 zones that have continuing reports, from SolidWaste_Spillover_Phase3.R
#Note, from SolidWaste_Phase1_Analysis: no subjects recruited from Ntinda, Village 7 (reason was never documented and PM/Jacob unsure)
p1.sample <- subset(p1.sample, X!=565)
p2.sample <- subset(p2.sample, InSample==1) #removing replacement zones not used, original zones not usable
prev.zones <- c(p1.sample$X, p2.sample$zone.id.realized) #Note: some zones might not have active reporting, despite reporters recruited (per SolidWaste_Phase2_Analysis.R)

piles <- data.frame(main_dta$m2.pile.lon, main_dta$m2.pile.lat)
piles <- piles[complete.cases(piles),]
names(piles) <- c("lon","lat")

LongLatToUTM<-function(x,y,zone){
  xy <- data.frame(ID = 1:length(x), X = x, Y = y)
  coordinates(xy) <- c("X", "Y")
  proj4string(xy) <- CRS("+proj=longlat +datum=WGS84")  ## for example
  res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
  return(as.data.frame(res))
}

cords <- LongLatToUTM(x=piles$lon, y=piles$lat, zone=36)

# tiff("./Figures/Figure2.tiff", width=5, height=5, units="in", res=300)
# par(mar = c(1, 1, 1, 1))
# plot(lc1, col = "lightgrey")
# plot(lc1[row.names(lc1@data) %in% ctl.zones, ], col = "blue", add = TRUE)
# plot(lc1[row.names(lc1@data) %in% trt.zones, ], col = "red", add = TRUE)
# plot(lc1[row.names(lc1@data) %in% prev.zones, ], col = "black", density=40, add = TRUE)
# points(x=cords$X, y=cords$Y, col="lightgreen")
# legend("bottomleft", legend=c("Control","Treated","Previous Sample", "Waste Pile"), cex=1,
#        pch=c(NA,NA,NA,1), fill=c("blue","red","black","white"), density=c(NA,NA,40,NA), border=c(1,1,1,0), col=c(NA,NA,NA,"lightgreen"))
# dev.off()

# png("./Figures/Figure2.png", width=5, height=5, units="in", res=300)
par(mar = c(1, 1, 1, 1))
plot(lc1, col = "lightgrey")
plot(lc1[row.names(lc1@data) %in% ctl.zones, ], col = "blue", add = TRUE)
plot(lc1[row.names(lc1@data) %in% trt.zones, ], col = "red", add = TRUE)
plot(lc1[row.names(lc1@data) %in% prev.zones, ], col = "black", density=40, add = TRUE)
points(x=cords$X, y=cords$Y, col="lightgreen")
legend("bottomleft", legend=c("Control","Treated","Previous Sample", "Waste Pile"), cex=1, 
       pch=c(NA,NA,NA,1), fill=c("blue","red","black","white"), density=c(NA,NA,40,NA), border=c(1,1,1,0), col=c(NA,NA,NA,"lightgreen")) 
# dev.off()


## Figure 7: Dependent Variables, Midline 1 -----
#WASTE PILE DUMMY
table(subsetA_dta$treat, subsetA_dta$m1.waste.pile_d)

a<-data.frame((prop.table(table(subsetA_dta$treat, subsetA_dta$m1.waste.pile_d),1)))
names(a) <- c("treat", "desc", "prop")

a$desc <- ordered(factor(c(rep("Cleaned", 2), rep("Not Cleaned", 2))), levels=c("Cleaned", "Not Cleaned"))
a$desc.n <- as.numeric(a$desc)
a <- a[a$desc=="Cleaned",]

ctrl <- as.numeric(length(which(subsetA_dta$m1.waste.pile_d==0&subsetA_dta$treat==0)))
trt <- as.numeric(length(which(subsetA_dta$m1.waste.pile_d==0&subsetA_dta$treat==1)))
n.ctrl <- as.numeric(length(which(subsetA_dta$treat==0))) #count of units in control stays the same for all t-tests
n.trt <- as.numeric(length(which(subsetA_dta$treat==1))) #count of units in treatment stays the same for all t-tests

ctrl.means <- rep(NA,1000)
trt.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctrl.means[i] <- mean(sample(c(rep(0,ctrl),rep(1,n.ctrl-ctrl)), replace=T))
  trt.means[i] <- mean(sample(c(rep(0,trt),rep(1,n.trt-trt)), replace=T))
} 

a$se <- c(sd(ctrl.means),sd(trt.means))
a$Assignment <- c("Control", "Treatment")

ctrl <- as.numeric(length(which(subsetA_dta$m1.waste.pile_d==0&subsetA_dta$treat==0)))
trt <- as.numeric(length(which(subsetA_dta$m1.waste.pile_d==0&subsetA_dta$treat==1)))
n.ctrl <- as.numeric(length(which(subsetA_dta$treat==0))) #count of units in control stays the same for all t-tests
n.trt <- as.numeric(length(which(subsetA_dta$treat==1))) #count of units in treatment stays the same for all t-tests

prop.test(c(ctrl,trt), c(n.ctrl,n.trt), alternative = "l")

#WASTE PILE DUMMY ADJUSTED
table(subsetA_dta$treat, subsetA_dta$m1.waste.pile_d2)

a.adj <- data.frame((prop.table(table(subsetA_dta$treat, subsetA_dta$m1.waste.pile_d2),1)))
names(a.adj) <- c("treat", "desc", "prop")
a.adj$desc <- ordered(factor(c(rep("Cleaned", 2), rep("Not Cleaned", 2))), levels=c("Cleaned", "Not Cleaned"))
a.adj$desc.n <- as.numeric(a.adj$desc)
a.adj <- a.adj[a.adj$desc=="Cleaned",]


ctrl <- as.numeric(length(which(subsetA_dta$m1.waste.pile_d2==0&subsetA_dta$treat==0)))
trt <- as.numeric(length(which(subsetA_dta$m1.waste.pile_d2==0&subsetA_dta$treat==1)))

ctrl.means <- rep(NA,1000)
trt.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctrl.means[i] <- mean(sample(c(rep(0,ctrl),rep(1,n.ctrl-ctrl)), replace=T))
  trt.means[i] <- mean(sample(c(rep(0,trt),rep(1,n.trt-trt)), replace=T))
} 

a.adj$se <- c(sd(ctrl.means),sd(trt.means))
a.adj$Assignment <- c("Control", "Treatment")

prop.test(c(ctrl,trt), c(n.ctrl,n.trt), alternative="l")

##GENERAL PILE DESCRIPTION
table(subsetA_dta$treat, subsetA_dta$m1.pile.gdesc)
subsetA_dta$m1.pile.gdesc <- ifelse(subsetA_dta$m1.pile.gdesc==""&subsetA_dta$m1.waste.pile_d==1, "Missing", subsetA_dta$m1.pile.gdesc)
table(subsetA_dta$treat, subsetA_dta$m1.pile.gdesc)
b <- data.frame(prop.table(table(subsetA_dta$treat, subsetA_dta$m1.pile.gdesc),1))
names(b) <- c("treat","desc","prop")
b$desc <- as.character(b$desc)
#Removing missing responses from table
b <- b[!(b$desc=="Missing"),]
#Recoding possible answers -> groups
b$desc[b$desc==""] <- "Cleaned"
b$desc[b$desc=="More than 10 pieces of non-organic waste"] <- ">10 pcs.\nwaste"
b$desc[b$desc=="Less than 10 pieces of non-organic waste"] <- "<10 pcs.\nwaste"
b$desc[b$desc=="Large sack(s) or container(s) of rubbish that can easily be transported"] <- "In \nContainers"
b <- b[!is.na(b$desc),]
b$desc <- ordered(factor(b$desc,
                         levels=c("Cleaned","In \nContainers","<10 pcs.\nwaste", ">10 pcs.\nwaste")))
b$desc.n <- as.numeric(b$desc)

con.prop.transport <- rep(NA,1000)
trt.prop.transport <- rep(NA,1000)

con.prop.large <- rep(NA,1000)
trt.prop.large <- rep(NA,1000)

con.prop.small <- rep(NA,1000)
trt.prop.small <- rep(NA,1000)

con.prop.cleaned <- rep(NA,1000)
trt.prop.cleaned <- rep(NA,1000)

set.seed(209)
for (i in 1:1000){
  con.vec <- sample(subsetA_dta$m1.pile.gdesc[subsetA_dta$treat==0], replace=T, size=length(subsetA_dta$m1.pile.gdesc[subsetA_dta$treat==0]))
  trt.vec <- sample(subsetA_dta$m1.pile.gdesc[subsetA_dta$treat==1], replace=T, size=length(subsetA_dta$m1.pile.gdesc[subsetA_dta$treat==1]))
  
  con.vec <- con.vec[!is.na(con.vec)]
  trt.vec <- trt.vec[!is.na(trt.vec)]
  
  con.prop.transport[i] <- length(con.vec[con.vec=="Large sack(s) or container(s) of rubbish that can easily be transported"]) / length(con.vec)
  trt.prop.transport[i] <- length(trt.vec[trt.vec=="Large sack(s) or container(s) of rubbish that can easily be transported"]) / length(trt.vec)
  
  con.prop.large[i] <- length(con.vec[con.vec=="More than 10 pieces of non-organic waste"]) / length(con.vec)
  trt.prop.large[i] <- length(trt.vec[trt.vec=="More than 10 pieces of non-organic waste"]) / length(trt.vec)
  
  con.prop.small[i] <- length(con.vec[con.vec=="Less than 10 pieces of non-organic waste"]) / length(con.vec)
  trt.prop.small[i] <- length(trt.vec[trt.vec=="Less than 10 pieces of non-organic waste"]) / length(trt.vec)
  
  con.prop.cleaned[i] <- length(con.vec[con.vec==""]) / length(con.vec)
  trt.prop.cleaned[i] <- length(trt.vec[trt.vec==""]) / length(trt.vec)
}

b$se[1:2] <- c(sd(con.prop.cleaned),sd(trt.prop.cleaned))
b$se[3:4] <- c(sd(con.prop.transport),sd(trt.prop.transport))
b$se[5:6] <- c(sd(con.prop.large),sd(trt.prop.large))
b$se[7:8] <- c(sd(con.prop.small),sd(trt.prop.small))

b$Assignment <- c("Control", "Treatment")

#T-tests
ctrl.largesacks <- as.numeric(length(which(subsetA_dta$m1.pile.gdesc=="Large sack(s) or container(s) of rubbish that can easily be transported"&subsetA_dta$treat==0)))
trt.largesacks <- as.numeric(length(which(subsetA_dta$m1.pile.gdesc=="Large sack(s) or container(s) of rubbish that can easily be transported"&subsetA_dta$treat==1)))
ctrl.less10pcs <- as.numeric(length(which(subsetA_dta$m1.pile.gdesc=="Less than 10 pieces of non-organic waste"&subsetA_dta$treat==0)))
trt.less10pcs <- as.numeric(length(which(subsetA_dta$m1.pile.gdesc=="Less than 10 pieces of non-organic waste"&subsetA_dta$treat==1)))
ctrl.more10pcs <- as.numeric(length(which(subsetA_dta$m1.pile.gdesc=="More than 10 pieces of non-organic waste"&subsetA_dta$treat==0)))
trt.more10pcs <- as.numeric(length(which(subsetA_dta$m1.pile.gdesc=="More than 10 pieces of non-organic waste"&subsetA_dta$treat==1)))


prop.test(c(ctrl.largesacks,trt.largesacks), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.less10pcs,trt.less10pcs), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.more10pcs,trt.more10pcs), c(n.ctrl,n.trt), alternative = "g")


##STORAGE
table(subsetA_dta$treat,subsetA_dta$m1.waste.stor)

c <- data.frame(prop.table(table(subsetA_dta$treat,subsetA_dta$m1.waste.stor),1))
names(c) <- c("treat","desc","prop")
c$desc <- as.character(c$desc)
c$desc[c$desc==""] <- "Cleaned"
c$desc[c$desc=="No rubbish is contained in sacks or containers"] <- "None"
c$desc[c$desc=="All of the rubbish is neatly contained with sacks or other containers"] <- "Full \n"
c$desc[c$desc=="Most of the rubbish is organized in sacks or other containers"] <- "Most"
c$desc[c$desc=="Very little rubbish is contained within sacks or containers"] <- "Some"
c <- c[!is.na(c$desc),]
c$desc <- ordered(factor(c$desc,
                         levels=c("Cleaned","Full \n","Most","Some","None")))
c$desc.n <- as.numeric(c$desc)

con.prop.uc <- rep(NA,1000)
trt.prop.uc <- rep(NA,1000)

con.prop.pc <- rep(NA,1000)
trt.prop.pc <- rep(NA,1000)

con.prop.mc <- rep(NA,1000)
trt.prop.mc <- rep(NA,1000)

con.prop.fc <- rep(NA,1000)
trt.prop.fc <- rep(NA,1000)

con.prop.cc <- rep(NA,1000)
trt.prop.cc <- rep(NA,1000)

set.seed(201)
for (i in 1:1000){
  con.vec_c <- sample(subsetA_dta$m1.waste.stor[subsetA_dta$treat==0], replace=T, size=length(subsetA_dta$m1.waste.stor[subsetA_dta$treat==0]))
  trt.vec_c <- sample(subsetA_dta$m1.waste.stor[subsetA_dta$treat==1], replace=T, size=length(subsetA_dta$m1.waste.stor[subsetA_dta$treat==1]))
  
  con.vec_c <- con.vec_c[!is.na(con.vec_c)]
  trt.vec_c <- trt.vec_c[!is.na(trt.vec_c)]
  
  con.prop.uc[i] <- length(con.vec_c[con.vec_c=="No rubbish is contained in sacks or containers"])/length(con.vec_c)
  trt.prop.uc[i] <- length(trt.vec_c[trt.vec_c=="No rubbish is contained in sacks or containers"])/length(trt.vec_c)
  
  con.prop.pc[i] <- length(con.vec_c[con.vec_c=="Very little rubbish is contained within sacks or containers"])/length(con.vec_c)
  trt.prop.pc[i] <- length(trt.vec_c[trt.vec_c=="Very little rubbish is contained within sacks or containers"])/length(trt.vec_c)
  
  con.prop.mc[i] <- length(con.vec_c[con.vec_c=="Most of the rubbish is organized in sacks or other containers"])/length(con.vec_c)
  trt.prop.mc[i] <- length(trt.vec_c[trt.vec_c=="Most of the rubbish is organized in sacks or other containers"])/length(trt.vec_c)
  
  con.prop.fc[i] <- length(con.vec_c[con.vec_c=="All of the rubbish is neatly contained with sacks or other containers"])/length(con.vec_c)
  trt.prop.fc[i] <- length(trt.vec_c[trt.vec_c=="All of the rubbish is neatly contained with sacks or other containers"])/length(trt.vec_c)
  
  con.prop.cc[i] <- length(con.vec_c[con.vec_c==""])/length(con.vec_c)
  trt.prop.cc[i] <- length(trt.vec_c[trt.vec_c==""])/length(trt.vec_c)
  
}

c$se[1:2] <- c(sd(con.prop.cc),sd(trt.prop.cc))
c$se[3:4] <- c(sd(con.prop.fc),sd(trt.prop.fc))
c$se[5:6] <- c(sd(con.prop.mc),sd(trt.prop.mc))
c$se[7:8] <- c(sd(con.prop.uc),sd(trt.prop.uc))
c$se[9:10] <- c(sd(con.prop.pc),sd(trt.prop.pc))
c$Assignment <- c("Control", "Treatment")

#T-tests
ctrl.full <- as.numeric(length(which(subsetA_dta$m1.waste.stor=="All of the rubbish is neatly contained with sacks or other containers"&subsetA_dta$treat==0)))
ctrl.most <- as.numeric(length(which(subsetA_dta$m1.waste.stor=="Most of the rubbish is organized in sacks or other containers"&subsetA_dta$treat==0)))
ctrl.some <- as.numeric(length(which(subsetA_dta$m1.waste.stor=="Very little rubbish is contained within sacks or containers"&subsetA_dta$treat==0)))
ctrl.none <- as.numeric(length(which(subsetA_dta$m1.waste.stor=="No rubbish is contained in sacks or containers"&subsetA_dta$treat==0)))
trt.full <- as.numeric(length(which(subsetA_dta$m1.waste.stor=="All of the rubbish is neatly contained with sacks or other containers"&subsetA_dta$treat==1)))
trt.most <- as.numeric(length(which(subsetA_dta$m1.waste.stor=="Most of the rubbish is organized in sacks or other containers"&subsetA_dta$treat==1)))
trt.some <- as.numeric(length(which(subsetA_dta$m1.waste.stor=="Very little rubbish is contained within sacks or containers"&subsetA_dta$treat==1)))
trt.none <- as.numeric(length(which(subsetA_dta$m1.waste.stor=="No rubbish is contained in sacks or containers"&subsetA_dta$treat==1)))

prop.test(c(ctrl.full,trt.full), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.most,trt.most), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.some,trt.some), c(n.ctrl,n.trt), alternative = "g")
prop.test(c(ctrl.none,trt.none), c(n.ctrl,n.trt), alternative = "g")

#ORGANIZATION
table(subsetA_dta$treat,subsetA_dta$m1.pile.org)

d <- data.frame(prop.table(table(subsetA_dta$treat,subsetA_dta$m1.pile.org),1))
names(d) <- c("treat","desc","prop")
d$desc <- as.character(d$desc)

#Recoding possible answers -> groups
d$desc[d$desc==""] <- "Cleaned"
d$desc[d$desc=="All of the rubbish is collected into a single pile"] <- "Pile"
d$desc[d$desc=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"] <- "Dispersed \naround Pile"
d$desc[d$desc=="Rubbish is spread all round no evidence of the rubbish being organized"] <- "Dispersed"
d <- d[!is.na(d$desc),]
d$desc <- ordered(factor(d$desc, levels = c("Cleaned","Pile","Dispersed \naround Pile","Dispersed")))
d$desc.n <- as.numeric(d$desc)

con.prop.wopc <- rep(NA,1000)
trt.prop.wopc <- rep(NA,1000)

con.prop.sp <- rep(NA,1000)
trt.prop.sp <- rep(NA,1000)

con.prop.spd <- rep(NA,1000)
trt.prop.spd <- rep(NA,1000)

con.prop.npd <- rep(NA,1000)
trt.prop.npd <- rep(NA,1000)

set.seed(201)
for (i in 1:1000){
  con.vec_d <- sample(subsetA_dta$m1.pile.org[subsetA_dta$treat==0], replace=T, size=length(subsetA_dta$m1.pile.org[subsetA_dta$treat==0]))
  trt.vec_d <- sample(subsetA_dta$m1.pile.org[subsetA_dta$treat==1], replace=T, size=length(subsetA_dta$m1.pile.org[subsetA_dta$treat==1]))
  
  con.vec_d <- con.vec_d[!is.na(con.vec_d)]
  trt.vec_d <- trt.vec_d[!is.na(trt.vec_d)]
  
  con.prop.wopc[i] <- length(con.vec_d[con.vec_d==""])/length(con.vec_d)
  trt.prop.wopc[i] <- length(trt.vec_d[trt.vec_d==""])/length(trt.vec_d)
  
  con.prop.sp[i] <- length(con.vec_d[con.vec_d=="All of the rubbish is collected into a single pile"])/length(con.vec_d)
  trt.prop.sp[i] <- length(trt.vec_d[trt.vec_d=="All of the rubbish is collected into a single pile"])/length(trt.vec_d)
  
  con.prop.spd[i] <- length(con.vec_d[con.vec_d=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"])/length(con.vec_d)
  trt.prop.spd[i] <- length(trt.vec_d[trt.vec_d=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"])/length(trt.vec_d)
  
  con.prop.npd[i] <- length(con.vec_d[con.vec_d=="Rubbish is spread all round no evidence of the rubbish being organized"])/length(con.vec_d)
  trt.prop.npd[i] <- length(trt.vec_d[trt.vec_d=="Rubbish is spread all round no evidence of the rubbish being organized"])/length(trt.vec_d)
}

d$se[1:2] <- c(sd(con.prop.wopc),sd(trt.prop.wopc))
d$se[3:4] <- c(sd(con.prop.sp),sd(trt.prop.sp))
d$se[5:6] <- c(sd(con.prop.spd),sd(trt.prop.spd))
d$se[7:8] <- c(sd(con.prop.npd),sd(trt.prop.npd))
d$Assignment <- c("Control", "Treatment")

#T-tests
ctrl.single <- as.numeric(length(which(subsetA_dta$m1.pile.org=="All of the rubbish is collected into a single pile"&subsetA_dta$treat==0)))
ctrl.partdispersed <- as.numeric(length(which(subsetA_dta$m1.pile.org=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"&subsetA_dta$treat==0)))
ctrl.dispersed <- as.numeric(length(which(subsetA_dta$m1.pile.org=="Rubbish is spread all round no evidence of the rubbish being organized"&subsetA_dta$treat==0)))
trt.single <- as.numeric(length(which(subsetA_dta$m1.pile.org=="All of the rubbish is collected into a single pile"&subsetA_dta$treat==1)))
trt.partdispersed <- as.numeric(length(which(subsetA_dta$m1.pile.org=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"&subsetA_dta$treat==1)))
trt.dispersed <- as.numeric(length(which(subsetA_dta$m1.pile.org=="Rubbish is spread all round no evidence of the rubbish being organized"&subsetA_dta$treat==1)))

prop.test(c(ctrl.single,trt.single), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.partdispersed,trt.partdispersed), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.dispersed,trt.dispersed), c(n.ctrl,n.trt), alternative="g")

#BURNING
table(subsetA_dta$treat,subsetA_dta$m1.evi.burn)
e <- data.frame(prop.table(table(subsetA_dta$treat,subsetA_dta$m1.evi.burn),1))
names(e) <- c("treat","desc","prop")
e$desc <- as.character(e$desc)

con.prop.ec <- rep(NA,1000)
trt.prop.ec <- rep(NA,1000)

con.prop.bl <- rep(NA,1000)
trt.prop.bl <- rep(NA,1000)

con.prop.bm <- rep(NA,1000)
trt.prop.bm <- rep(NA,1000)

con.prop.nb <- rep(NA,1000)
trt.prop.nb <- rep(NA,1000)


#Recoding possible answers -> groups
e$desc[e$desc==""] <- "Cleaned"
e$desc[e$desc=="Less than half of the area of the rubbish pile contains evidence of burning"] <- "<50%"
e$desc[e$desc=="More than half of the area of the rubbish pile contains evidence of burning"] <- ">50%"
e$desc[e$desc=="No evidence of burning"] <- "None"
e <- e[!is.na(e$desc),]
e$desc <- ordered(factor(e$desc, levels = c("Cleaned","None", "<50%", ">50%")))
e$desc.n <- as.numeric(e$desc)

set.seed(201)

for (i in 1:1000){
  con.vec_e <- sample(subsetA_dta$m1.evi.burn[subsetA_dta$treat==0], replace=T, size=length(subsetA_dta$m1.evi.burn[subsetA_dta$treat==0]))
  trt.vec_e <- sample(subsetA_dta$m1.evi.burn[subsetA_dta$treat==1], replace=T, size=length(subsetA_dta$m1.evi.burn[subsetA_dta$treat==1]))
  
  con.vec_e <- con.vec_e[!is.na(con.vec_e)]
  trt.vec_e <- trt.vec_e[!is.na(trt.vec_e)]
  
  con.prop.ec[i] <- length(con.vec_e[con.vec_e==""])/length(con.vec_e)
  trt.prop.ec[i] <- length(trt.vec_e[trt.vec_e==""])/length(trt.vec_e)
  
  con.prop.nb[i] <- length(con.vec_e[con.vec_e=="No evidence of burning"])/length(con.vec_e)
  trt.prop.nb[i] <- length(trt.vec_e[trt.vec_e=="No evidence of burning"])/length(trt.vec_e)
  
  con.prop.bl[i] <- length(con.vec_e[con.vec_e=="Less than half of the area of the rubbish pile contains evidence of burning"])/length(con.vec_e)
  trt.prop.bl[i] <- length(trt.vec_e[trt.vec_e=="Less than half of the area of the rubbish pile contains evidence of burning"])/length(trt.vec_e)
  
  con.prop.bm[i] <- length(con.vec_e[con.vec_e=="More than half of the area of the rubbish pile contains evidence of burning"])/length(con.vec_e)
  trt.prop.bm[i] <- length(trt.vec_e[trt.vec_e=="More than half of the area of the rubbish pile contains evidence of burning"])/length(trt.vec_e)
  
}

e$se[1:2] <- c(sd(con.prop.ec),sd(trt.prop.ec))
e$se[3:4] <- c(sd(con.prop.bl),sd(trt.prop.bl))
e$se[5:6] <- c(sd(con.prop.bm),sd(trt.prop.bm))
e$se[7:8] <- c(sd(con.prop.nb),sd(trt.prop.nb))
e$Assignment <- c("Control", "Treatment")

#T-tests
ctrl.noburn <- as.numeric(length(which(subsetA_dta$m1.evi.burn=="No evidence of burning"&subsetA_dta$treat==0)))
ctrl.someburn <- as.numeric(length(which(subsetA_dta$m1.evi.burn=="Less than half of the area of the rubbish pile contains evidence of burning"&subsetA_dta$treat==0)))
ctrl.mostburn <- as.numeric(length(which(subsetA_dta$m1.evi.burn=="More than half of the area of the rubbish pile contains evidence of burning"&subsetA_dta$treat==0)))
trt.noburn <- as.numeric(length(which(subsetA_dta$m1.evi.burn=="No evidence of burning"&subsetA_dta$treat==1)))
trt.someburn <- as.numeric(length(which(subsetA_dta$m1.evi.burn=="Less than half of the area of the rubbish pile contains evidence of burning"&subsetA_dta$treat==1)))
trt.mostburn <- as.numeric(length(which(subsetA_dta$m1.evi.burn=="More than half of the area of the rubbish pile contains evidence of burning"&subsetA_dta$treat==1)))

prop.test(c(ctrl.noburn, trt.noburn), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.someburn, trt.someburn), c(n.ctrl,n.trt), alternative = "g")
prop.test(c(ctrl.mostburn, trt.mostburn), c(n.ctrl,n.trt), alternative = "g")


#Final Plot
a$dv <- "M1: Pile Cleaned"
a.adj$dv <- "M1: Pile Cleaned, Adjusted"
b$dv <- "M1: Pile Contents"
c$dv <- "M1: Waste Storage"
d$dv <- "M1: Waste Organization"
e$dv <- "M1: Area Burnt"
m1_outcomes <- rbind(a,a.adj,b,c,d,e)
m1_outcomes$ci95.low <- m1_outcomes$prop - m1_outcomes$se
m1_outcomes$ci95.high <- m1_outcomes$prop + m1_outcomes$se

m1_outcomes$dv <- factor(ordered(m1_outcomes$dv, levels=c("M1: Pile Cleaned", "M1: Pile Cleaned, Adjusted", "M1: Area Burnt",
                                                          "M1: Waste Storage", "M1: Waste Organization", "M1: Pile Contents")))
m1_outcomes_plot <- ggplot(m1_outcomes) +
  geom_bar(aes(desc, prop, group=Assignment, fill=Assignment), stat="identity", position=position_dodge(width=0.925)) +
  geom_errorbar(aes(desc, ymin=ci95.low, ymax=ci95.high, group=Assignment), position=position_dodge(0.925), width=0.25) +
  scale_x_discrete("Description") +
  scale_y_continuous("Proportion of Piles") +
  facet_wrap(.~dv, scales="free") +
  theme(legend.position = "bottom", 
        text=element_text(size=6),
        strip.text=element_text(size=7))
m1_outcomes_plot

# ggsave("./Figures/Figure7.tiff",
#        m1_outcomes_plot,
#        width=5, height=3.5, units="in")
# ggsave("./Figures/Figure7.png",
#        m1_outcomes_plot,
#        width=5, height=3.5, units="in")

## Figure 8: Descriptive Analyses, Midline 2 -----
table(subsetA_dta$treat, subsetA_dta$m2.waste.pile_d)

a2<-data.frame((prop.table(table(subsetA_dta$treat, subsetA_dta$m2.waste.pile_d),1)))
names(a2) <- c("treat", "desc", "prop")

a2$desc <- ordered(factor(c(rep("Cleaned", 2), rep("Not Cleaned", 2))), levels=c("Cleaned", "Not Cleaned"))
a2$desc.n <- as.numeric(a2$desc)
a2 <- a2[a2$desc=="Cleaned",]

ctrl <- as.numeric(length(which(subsetA_dta$m2.waste.pile_d==0&subsetA_dta$treat==0)))
trt <- as.numeric(length(which(subsetA_dta$m2.waste.pile_d==0&subsetA_dta$treat==1)))
n.ctrl <- as.numeric(length(which(subsetA_dta$treat==0))) #count of units in control stays the same for all t-tests
n.trt <- as.numeric(length(which(subsetA_dta$treat==1))) #count of units in treatment stays the same for all t-tests

ctrl.means <- rep(NA,1000)
trt.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctrl.means[i] <- mean(sample(c(rep(0,ctrl),rep(1,n.ctrl-ctrl)), replace=T))
  trt.means[i] <- mean(sample(c(rep(0,trt),rep(1,n.trt-trt)), replace=T))
} 

a2$se <- c(sd(ctrl.means),sd(trt.means))
a2$Assignment <- c("Control", "Treatment")

ctrl <- as.numeric(length(which(subsetA_dta$m2.waste.pile_d==0&subsetA_dta$treat==0)))
trt <- as.numeric(length(which(subsetA_dta$m2.waste.pile_d==0&subsetA_dta$treat==1)))
prop.test(c(ctrl,trt), c(n.ctrl,n.trt), alternative = "l")


##WASTE PILE DUMMY ADJUSTED
table(subsetA_dta$treat, subsetA_dta$m2.waste.pile_d2)

a2.adj <- data.frame((prop.table(table(subsetA_dta$treat, subsetA_dta$m2.waste.pile_d2),1)))
names(a2.adj) <- c("treat", "desc", "prop")
a2.adj$desc <- ordered(factor(c(rep("Cleaned", 2), rep("Not Cleaned", 2))), levels=c("Cleaned", "Not Cleaned"))
a2.adj$desc.n <- as.numeric(a2.adj$desc)
a2.adj <- a2.adj[a2.adj$desc=="Cleaned",]

ctrl <- as.numeric(length(which(subsetA_dta$m2.waste.pile_d2==0&subsetA_dta$treat==0)))
trt <- as.numeric(length(which(subsetA_dta$m2.waste.pile_d2==0&subsetA_dta$treat==1)))

ctrl.means <- rep(NA,1000)
trt.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctrl.means[i] <- mean(sample(c(rep(0,ctrl),rep(1,n.ctrl-ctrl)), replace=T))
  trt.means[i] <- mean(sample(c(rep(0,trt),rep(1,n.trt-trt)), replace=T))
} 

a2.adj$se <- c(sd(ctrl.means),sd(trt.means))
a2.adj$Assignment <- c("Control", "Treatment")

prop.test(c(ctrl,trt), c(n.ctrl,n.trt), alternative="l")

##GENERAL PILE DESCRIPTION
table(subsetA_dta$treat, subsetA_dta$m2.pile.gdesc)
subsetA_dta$m2.pile.gdesc <- ifelse(subsetA_dta$m2.pile.gdesc==""&subsetA_dta$m2.waste.pile_d==1, "Missing", subsetA_dta$m2.pile.gdesc)
table(subsetA_dta$treat, subsetA_dta$m2.pile.gdesc)
b2 <- data.frame(prop.table(table(subsetA_dta$treat, subsetA_dta$m2.pile.gdesc),1))
names(b2) <- c("treat","desc","prop")
b2$desc <- as.character(b2$desc)
#Removing missing responses from table
b2 <- b2[!(b2$desc=="Missing"),]
#Recoding possible answers -> groups
b2$desc[b2$desc==""] <- "Cleaned"
b2$desc[b2$desc=="More than 10 pieces of non-organic waste"] <- ">10 pcs.\nwaste"
b2$desc[b2$desc=="Less than 10 pieces of non-organic waste"] <- "<10 pcs.\nwaste"
b2$desc[b2$desc=="Large sack(s) or container(s) of rubbish that can easily be transported"] <- "In \nContainers"
b2 <- b2[!is.na(b2$desc),]
b2$desc <- ordered(factor(b2$desc,
                         levels=c("Cleaned","In \nContainers","<10 pcs.\nwaste", ">10 pcs.\nwaste")))
b2$desc.n <- as.numeric(b2$desc)

con.prop.transport <- rep(NA,1000)
trt.prop.transport <- rep(NA,1000)

con.prop.large <- rep(NA,1000)
trt.prop.large <- rep(NA,1000)

con.prop.small <- rep(NA,1000)
trt.prop.small <- rep(NA,1000)

con.prop.cleaned <- rep(NA,1000)
trt.prop.cleaned <- rep(NA,1000)

set.seed(209)
for (i in 1:1000){
  con.vec <- sample(subsetA_dta$m2.pile.gdesc[subsetA_dta$treat==0], replace=T, size=length(subsetA_dta$m2.pile.gdesc[subsetA_dta$treat==0]))
  trt.vec <- sample(subsetA_dta$m2.pile.gdesc[subsetA_dta$treat==1], replace=T, size=length(subsetA_dta$m2.pile.gdesc[subsetA_dta$treat==1]))
  
  con.vec <- con.vec[!is.na(con.vec)]
  trt.vec <- trt.vec[!is.na(trt.vec)]
  
  con.prop.transport[i] <- length(con.vec[con.vec=="Large sack(s) or container(s) of rubbish that can easily be transported"]) / length(con.vec)
  trt.prop.transport[i] <- length(trt.vec[trt.vec=="Large sack(s) or container(s) of rubbish that can easily be transported"]) / length(trt.vec)
  
  con.prop.large[i] <- length(con.vec[con.vec=="More than 10 pieces of non-organic waste"]) / length(con.vec)
  trt.prop.large[i] <- length(trt.vec[trt.vec=="More than 10 pieces of non-organic waste"]) / length(trt.vec)
  
  con.prop.small[i] <- length(con.vec[con.vec=="Less than 10 pieces of non-organic waste"]) / length(con.vec)
  trt.prop.small[i] <- length(trt.vec[trt.vec=="Less than 10 pieces of non-organic waste"]) / length(trt.vec)
  
  con.prop.cleaned[i] <- length(con.vec[con.vec==""]) / length(con.vec)
  trt.prop.cleaned[i] <- length(trt.vec[trt.vec==""]) / length(trt.vec)
}

b2$se[1:2] <- c(sd(con.prop.cleaned),sd(trt.prop.cleaned))
b2$se[3:4] <- c(sd(con.prop.transport),sd(trt.prop.transport))
b2$se[5:6] <- c(sd(con.prop.large),sd(trt.prop.large))
b2$se[7:8] <- c(sd(con.prop.small),sd(trt.prop.small))

b2$Assignment <- c("Control", "Treatment")

#T-tests
ctrl.largesacks <- as.numeric(length(which(subsetA_dta$m2.pile.gdesc=="Large sack(s) or container(s) of rubbish that can easily be transported"&subsetA_dta$treat==0)))
trt.largesacks <- as.numeric(length(which(subsetA_dta$m2.pile.gdesc=="Large sack(s) or container(s) of rubbish that can easily be transported"&subsetA_dta$treat==1)))
ctrl.less10pcs <- as.numeric(length(which(subsetA_dta$m2.pile.gdesc=="Less than 10 pieces of non-organic waste"&subsetA_dta$treat==0)))
trt.less10pcs <- as.numeric(length(which(subsetA_dta$m2.pile.gdesc=="Less than 10 pieces of non-organic waste"&subsetA_dta$treat==1)))
ctrl.more10pcs <- as.numeric(length(which(subsetA_dta$m2.pile.gdesc=="More than 10 pieces of non-organic waste"&subsetA_dta$treat==0)))
trt.more10pcs <- as.numeric(length(which(subsetA_dta$m2.pile.gdesc=="More than 10 pieces of non-organic waste"&subsetA_dta$treat==1)))


prop.test(c(ctrl.largesacks,trt.largesacks), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.less10pcs,trt.less10pcs), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.more10pcs,trt.more10pcs), c(n.ctrl,n.trt), alternative = "g")


##STORAGE
table(subsetA_dta$treat,subsetA_dta$m2.waste.stor)

c2 <- data.frame(prop.table(table(subsetA_dta$treat,subsetA_dta$m2.waste.stor),1))
names(c2) <- c("treat","desc","prop")
c2$desc <- as.character(c2$desc)
c2$desc[c2$desc==""] <- "Cleaned"
c2$desc[c2$desc=="No rubbish is contained in sacks or containers"] <- "None"
c2$desc[c2$desc=="All of the rubbish is neatly contained with sacks or other containers"] <- "Full \n"
c2$desc[c2$desc=="Most of the rubbish is organized in sacks or other containers"] <- "Most"
c2$desc[c2$desc=="Very little rubbish is contained within sacks or containers"] <- "Some"
c2 <- c2[!is.na(c2$desc),]
c2$desc <- ordered(factor(c2$desc,
                         levels=c("Cleaned","Full \n","Most","Some","None")))
c2$desc.n <- as.numeric(c$desc)

con.prop.uc <- rep(NA,1000)
trt.prop.uc <- rep(NA,1000)

con.prop.pc <- rep(NA,1000)
trt.prop.pc <- rep(NA,1000)

con.prop.mc <- rep(NA,1000)
trt.prop.mc <- rep(NA,1000)

con.prop.fc <- rep(NA,1000)
trt.prop.fc <- rep(NA,1000)

con.prop.cc <- rep(NA,1000)
trt.prop.cc <- rep(NA,1000)

set.seed(201)
for (i in 1:1000){
  con.vec_c <- sample(subsetA_dta$m2.waste.stor[subsetA_dta$treat==0], replace=T, size=length(subsetA_dta$m2.waste.stor[subsetA_dta$treat==0]))
  trt.vec_c <- sample(subsetA_dta$m2.waste.stor[subsetA_dta$treat==1], replace=T, size=length(subsetA_dta$m2.waste.stor[subsetA_dta$treat==1]))
  
  con.vec_c <- con.vec_c[!is.na(con.vec_c)]
  trt.vec_c <- trt.vec_c[!is.na(trt.vec_c)]
  
  con.prop.uc[i] <- length(con.vec_c[con.vec_c=="No rubbish is contained in sacks or containers"])/length(con.vec_c)
  trt.prop.uc[i] <- length(trt.vec_c[trt.vec_c=="No rubbish is contained in sacks or containers"])/length(trt.vec_c)
  
  con.prop.pc[i] <- length(con.vec_c[con.vec_c=="Very little rubbish is contained within sacks or containers"])/length(con.vec_c)
  trt.prop.pc[i] <- length(trt.vec_c[trt.vec_c=="Very little rubbish is contained within sacks or containers"])/length(trt.vec_c)
  
  con.prop.mc[i] <- length(con.vec_c[con.vec_c=="Most of the rubbish is organized in sacks or other containers"])/length(con.vec_c)
  trt.prop.mc[i] <- length(trt.vec_c[trt.vec_c=="Most of the rubbish is organized in sacks or other containers"])/length(trt.vec_c)
  
  con.prop.fc[i] <- length(con.vec_c[con.vec_c=="All of the rubbish is neatly contained with sacks or other containers"])/length(con.vec_c)
  trt.prop.fc[i] <- length(trt.vec_c[trt.vec_c=="All of the rubbish is neatly contained with sacks or other containers"])/length(trt.vec_c)
  
  con.prop.cc[i] <- length(con.vec_c[con.vec_c==""])/length(con.vec_c)
  trt.prop.cc[i] <- length(trt.vec_c[trt.vec_c==""])/length(trt.vec_c)
  
}

c2$se[1:2] <- c(sd(con.prop.cc),sd(trt.prop.cc))
c2$se[3:4] <- c(sd(con.prop.fc),sd(trt.prop.fc))
c2$se[5:6] <- c(sd(con.prop.mc),sd(trt.prop.mc))
c2$se[7:8] <- c(sd(con.prop.uc),sd(trt.prop.uc))
c2$se[9:10] <- c(sd(con.prop.pc),sd(trt.prop.pc))
c2$Assignment <- c("Control", "Treatment")

#T-tests
ctrl.full <- as.numeric(length(which(subsetA_dta$m2.waste.stor=="All of the rubbish is neatly contained with sacks or other containers"&subsetA_dta$treat==0)))
ctrl.most <- as.numeric(length(which(subsetA_dta$m2.waste.stor=="Most of the rubbish is organized in sacks or other containers"&subsetA_dta$treat==0)))
ctrl.some <- as.numeric(length(which(subsetA_dta$m2.waste.stor=="Very little rubbish is contained within sacks or containers"&subsetA_dta$treat==0)))
ctrl.none <- as.numeric(length(which(subsetA_dta$m2.waste.stor=="No rubbish is contained in sacks or containers"&subsetA_dta$treat==0)))
trt.full <- as.numeric(length(which(subsetA_dta$m2.waste.stor=="All of the rubbish is neatly contained with sacks or other containers"&subsetA_dta$treat==1)))
trt.most <- as.numeric(length(which(subsetA_dta$m2.waste.stor=="Most of the rubbish is organized in sacks or other containers"&subsetA_dta$treat==1)))
trt.some <- as.numeric(length(which(subsetA_dta$m2.waste.stor=="Very little rubbish is contained within sacks or containers"&subsetA_dta$treat==1)))
trt.none <- as.numeric(length(which(subsetA_dta$m2.waste.stor=="No rubbish is contained in sacks or containers"&subsetA_dta$treat==1)))

prop.test(c(ctrl.full,trt.full), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.most,trt.most), c(n.ctrl,n.trt), alternative = "l")
prop.test(c(ctrl.some,trt.some), c(n.ctrl,n.trt), alternative = "g")
prop.test(c(ctrl.none,trt.none), c(n.ctrl,n.trt), alternative = "g")

#ORGANIZATION
table(subsetA_dta$treat,subsetA_dta$m2.pile.org)

d2 <- data.frame(prop.table(table(subsetA_dta$treat,subsetA_dta$m2.pile.org),1))
names(d2) <- c("treat","desc","prop")
d2$desc <- as.character(d2$desc)

#Recoding possible answers -> groups
d2$desc[d2$desc==""] <- "Cleaned"
d2$desc[d2$desc=="All of the rubbish is collected into a single pile"] <- "Pile"
d2$desc[d2$desc=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"] <- "Dispersed \naround Pile"
d2$desc[d2$desc=="Rubbish is spread all round no evidence of the rubbish being organized"] <- "Dispersed"
d2 <- d2[!is.na(d2$desc),]
d2$desc <- ordered(factor(d2$desc, levels = c("Cleaned","Pile","Dispersed \naround Pile","Dispersed")))
d2$desc.n <- as.numeric(d2$desc)

con.prop.wopc <- rep(NA,1000)
trt.prop.wopc <- rep(NA,1000)

con.prop.sp <- rep(NA,1000)
trt.prop.sp <- rep(NA,1000)

con.prop.spd <- rep(NA,1000)
trt.prop.spd <- rep(NA,1000)

con.prop.npd <- rep(NA,1000)
trt.prop.npd <- rep(NA,1000)

set.seed(201)
for (i in 1:1000){
  con.vec_d <- sample(subsetA_dta$m2.pile.org[subsetA_dta$treat==0], replace=T, size=length(subsetA_dta$m2.pile.org[subsetA_dta$treat==0]))
  trt.vec_d <- sample(subsetA_dta$m2.pile.org[subsetA_dta$treat==1], replace=T, size=length(subsetA_dta$m2.pile.org[subsetA_dta$treat==1]))
  
  con.vec_d <- con.vec_d[!is.na(con.vec_d)]
  trt.vec_d <- trt.vec_d[!is.na(trt.vec_d)]
  
  con.prop.wopc[i] <- length(con.vec_d[con.vec_d==""])/length(con.vec_d)
  trt.prop.wopc[i] <- length(trt.vec_d[trt.vec_d==""])/length(trt.vec_d)
  
  con.prop.sp[i] <- length(con.vec_d[con.vec_d=="All of the rubbish is collected into a single pile"])/length(con.vec_d)
  trt.prop.sp[i] <- length(trt.vec_d[trt.vec_d=="All of the rubbish is collected into a single pile"])/length(trt.vec_d)
  
  con.prop.spd[i] <- length(con.vec_d[con.vec_d=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"])/length(con.vec_d)
  trt.prop.spd[i] <- length(trt.vec_d[trt.vec_d=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"])/length(trt.vec_d)
  
  con.prop.npd[i] <- length(con.vec_d[con.vec_d=="Rubbish is spread all round no evidence of the rubbish being organized"])/length(con.vec_d)
  trt.prop.npd[i] <- length(trt.vec_d[trt.vec_d=="Rubbish is spread all round no evidence of the rubbish being organized"])/length(trt.vec_d)
}

d2$se[1:2] <- c(sd(con.prop.wopc),sd(trt.prop.wopc))
d2$se[3:4] <- c(sd(con.prop.sp),sd(trt.prop.sp))
d2$se[5:6] <- c(sd(con.prop.spd),sd(trt.prop.spd))
d2$se[7:8] <- c(sd(con.prop.npd),sd(trt.prop.npd))
d2$Assignment <- c("Control", "Treatment")

#T-tests
ctrl.single <- as.numeric(length(which(subsetA_dta$m2.pile.org=="All of the rubbish is collected into a single pile"&subsetA_dta$treat==0)))
ctrl.partdispersed <- as.numeric(length(which(subsetA_dta$m2.pile.org=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"&subsetA_dta$treat==0)))
ctrl.dispersed <- as.numeric(length(which(subsetA_dta$m2.pile.org=="Rubbish is spread all round no evidence of the rubbish being organized"&subsetA_dta$treat==0)))
trt.single <- as.numeric(length(which(subsetA_dta$m2.pile.org=="All of the rubbish is collected into a single pile"&subsetA_dta$treat==1)))
trt.partdispersed <- as.numeric(length(which(subsetA_dta$m2.pile.org=="Most of the rubbish is organized around a single pile but other rubbish is spread nearby"&subsetA_dta$treat==1)))
trt.dispersed <- as.numeric(length(which(subsetA_dta$m2.pile.org=="Rubbish is spread all round no evidence of the rubbish being organized"&subsetA_dta$treat==1)))

prop.test(c(ctrl.single,trt.single), c(n.ctrl,n.trt), alternative = "l")#p=0.315; single pile
prop.test(c(ctrl.partdispersed,trt.partdispersed), c(n.ctrl,n.trt), alternative = "l")#p=0.223; most in single pile
prop.test(c(ctrl.dispersed,trt.dispersed), c(n.ctrl,n.trt), alternative="g")#p=0.116; no pile

#BURNING
table(subsetA_dta$treat,subsetA_dta$m2.evi.burn)
e2 <- data.frame(prop.table(table(subsetA_dta$treat,subsetA_dta$m2.evi.burn),1))
names(e2) <- c("treat","desc","prop")
e2$desc <- as.character(e2$desc)

con.prop.ec <- rep(NA,1000)
trt.prop.ec <- rep(NA,1000)

con.prop.bl <- rep(NA,1000)
trt.prop.bl <- rep(NA,1000)

con.prop.bm <- rep(NA,1000)
trt.prop.bm <- rep(NA,1000)

con.prop.nb <- rep(NA,1000)
trt.prop.nb <- rep(NA,1000)


#Recoding possible answers -> groups
e2$desc[e2$desc==""] <- "Cleaned"
e2$desc[e2$desc=="Less than half of the area of the rubbish pile contains evidence of burning"] <- "<50%"
e2$desc[e2$desc=="More than half of the area of the rubbish pile contains evidence of burning"] <- ">50%"
e2$desc[e2$desc=="No evidence of burning"] <- "None"
e2 <- e2[!is.na(e2$desc),]
e2$desc <- ordered(factor(e2$desc, levels = c("Cleaned","None", "<50%", ">50%")))
e2$desc.n <- as.numeric(e2$desc)

set.seed(201)

for (i in 1:1000){
  con.vec_e <- sample(subsetA_dta$m2.evi.burn[subsetA_dta$treat==0], replace=T, size=length(subsetA_dta$m2.evi.burn[subsetA_dta$treat==0]))
  trt.vec_e <- sample(subsetA_dta$m2.evi.burn[subsetA_dta$treat==1], replace=T, size=length(subsetA_dta$m2.evi.burn[subsetA_dta$treat==1]))
  
  con.vec_e <- con.vec_e[!is.na(con.vec_e)]
  trt.vec_e <- trt.vec_e[!is.na(trt.vec_e)]
  
  con.prop.ec[i] <- length(con.vec_e[con.vec_e==""])/length(con.vec_e)
  trt.prop.ec[i] <- length(trt.vec_e[trt.vec_e==""])/length(trt.vec_e)
  
  con.prop.nb[i] <- length(con.vec_e[con.vec_e=="No evidence of burning"])/length(con.vec_e)
  trt.prop.nb[i] <- length(trt.vec_e[trt.vec_e=="No evidence of burning"])/length(trt.vec_e)
  
  con.prop.bl[i] <- length(con.vec_e[con.vec_e=="Less than half of the area of the rubbish pile contains evidence of burning"])/length(con.vec_e)
  trt.prop.bl[i] <- length(trt.vec_e[trt.vec_e=="Less than half of the area of the rubbish pile contains evidence of burning"])/length(trt.vec_e)
  
  con.prop.bm[i] <- length(con.vec_e[con.vec_e=="More than half of the area of the rubbish pile contains evidence of burning"])/length(con.vec_e)
  trt.prop.bm[i] <- length(trt.vec_e[trt.vec_e=="More than half of the area of the rubbish pile contains evidence of burning"])/length(trt.vec_e)
  
}

e2$se[1:2] <- c(sd(con.prop.ec),sd(trt.prop.ec))
e2$se[3:4] <- c(sd(con.prop.bl),sd(trt.prop.bl))
e2$se[5:6] <- c(sd(con.prop.bm),sd(trt.prop.bm))
e2$se[7:8] <- c(sd(con.prop.nb),sd(trt.prop.nb))
e2$Assignment <- c("Control", "Treatment")

#T-tests
ctrl.noburn <- as.numeric(length(which(subsetA_dta$m2.evi.burn=="No evidence of burning"&subsetA_dta$treat==0)))
ctrl.someburn <- as.numeric(length(which(subsetA_dta$m2.evi.burn=="Less than half of the area of the rubbish pile contains evidence of burning"&subsetA_dta$treat==0)))
ctrl.mostburn <- as.numeric(length(which(subsetA_dta$m2.evi.burn=="More than half of the area of the rubbish pile contains evidence of burning"&subsetA_dta$treat==0)))
trt.noburn <- as.numeric(length(which(subsetA_dta$m2.evi.burn=="No evidence of burning"&subsetA_dta$treat==1)))
trt.someburn <- as.numeric(length(which(subsetA_dta$m2.evi.burn=="Less than half of the area of the rubbish pile contains evidence of burning"&subsetA_dta$treat==1)))
trt.mostburn <- as.numeric(length(which(subsetA_dta$m2.evi.burn=="More than half of the area of the rubbish pile contains evidence of burning"&subsetA_dta$treat==1)))

prop.test(c(ctrl.noburn, trt.noburn), c(n.ctrl,n.trt), alternative = "l")#p=0.037; no evidence of burning
prop.test(c(ctrl.someburn, trt.someburn), c(n.ctrl,n.trt), alternative = "g")#p=0.171; some evidence of burning
prop.test(c(ctrl.mostburn, trt.mostburn), c(n.ctrl,n.trt), alternative = "g")#p=0.071; significant evidence of burning


#Final Plot
a2$dv <- "M2: Pile Cleaned"
a2.adj$dv <- "M2: Pile Cleaned, Adjusted"
b2$dv <- "M2: Pile Contents"
c2$dv <- "M2: Waste Storage"
d2$dv <- "M2: Waste Organization"
e2$dv <- "M2: Area Burnt"
m2_outcomes <- rbind(a2,a2.adj,b2,c2,d2,e2)
m2_outcomes$ci95.low <- m2_outcomes$prop - m2_outcomes$se
m2_outcomes$ci95.high <- m2_outcomes$prop + m2_outcomes$se

m2_outcomes$dv <- factor(ordered(m2_outcomes$dv, levels=c("M2: Pile Cleaned", "M2: Pile Cleaned, Adjusted", "M2: Area Burnt",
                                                          "M2: Waste Storage", "M2: Waste Organization", "M2: Pile Contents")))
m2_outcomes_plot <- ggplot(m2_outcomes) +
  geom_bar(aes(desc, prop, group=Assignment, fill=Assignment), stat="identity", position=position_dodge(width=0.925)) +
  geom_errorbar(aes(desc, ymin=ci95.low, ymax=ci95.high, group=Assignment), position=position_dodge(0.925), width=0.25) +
  scale_x_discrete("Description") +
  scale_y_continuous("Proportion of Piles") +
  facet_wrap(.~dv, scales="free") +
  theme(legend.position = "bottom",
        text=element_text(size=6),
        strip.text=element_text(size=7))
m2_outcomes_plot

# ggsave("./Figures/Figure8.tiff",
#        m2_outcomes_plot,
#        width=5, height=3.5, units="in")
# ggsave("./Figures/Figure8.png",
#        m2_outcomes_plot,
#        width=5, height=3.5, units="in")

## Figure 9: Reporting Consistency -----
plot.df <- subset(main_dta, select=c(zone.id,bad.overall,prop.deviant_overall))
plot.df <- plot.df[!duplicated(plot.df),]

main_dta$bad.overall

zid <- plot.df$zone.id

lc1@data$prop.bad.overall <- NA
for(i in 1:length(zid)) {
  lc1@data$prop.deviant_overall[lc1@data$zone.id==zid[i]] <- unique(plot.df$prop.deviant_overall[plot.df$zone.id==zid[i]])
}

summary(lc1@data$prop.deviant_overall)
summary(plot.df$prop.deviant_overall)

lc1.f <- fortify(lc1, region="zone.id") 
lc1.df <- left_join(lc1.f, lc1@data, by=c("id"="zone.id"))

bad.service <- as.character(plot.df$zone.id[plot.df$bad.overall==1])
bad.service <- bad.service[!is.na(bad.service)]

#Basemap of Kampala
basemap <- ggplot() +
  geom_polygon(data=lc1, aes(long,lat,group=group), fill="white") +
  geom_path(data=lc1, aes(long,lat,group=group), color="grey65", size=0.15) +
  theme_void() +
  coord_equal()

#Final Figure
bad.overall_map <- basemap +
  geom_polygon(data=lc1.df[is.na(lc1.df$prop.deviant_overall)==FALSE,], aes(long,lat,group=group,fill=prop.deviant_overall)) + 
  geom_path(data=lc1.df[c(lc1.df$id %in% bad.service & is.na(lc1.df$prop.deviant_overall)==FALSE),], aes(long,lat,group=group),col="red1", size=0.75) +
  theme_void() + 
  coord_equal() +
  scale_fill_gradient("Proportion of\nInconsistent\nResponses", high="black", low="light blue") + 
  theme(legend.title = element_text(size=12), legend.text = element_text(size=12), legend.position = "right")
bad.overall_map

# tiff("./Figures/Figure9.tiff", width=5, height=5, units="in", res=300)
# bad.overall_map
# dev.off()
# png("./Figures/Figure9.png", width=5, height=5, units="in", res=300)
# bad.overall_map
# dev.off()

## Figure J1: Balance Table -----
##Making Pile Size Declies for better visualization
main_dta$b.pile.area_dec <- NA
main_dta$b.pile.area_dec <- ntile(main_dta$b.pile.area_m, 10)

table(main_dta$b.pile.area_dec, useNA="always")

pre.ph3 <- subset(main_dta, select=c(unique.id, zone.id, treat, div,
                                         p1.p2.monitoring, b.type.site, b.pile.area_dec,
                                         lights.mean, area.km2, road.density,ls.pop_2016))
table(pre.ph3$b.type.site)
##Change Variable Names for Plotting
var.names <- c(names(pre.ph3)[1:3], "Division",
               "Previous Phase Reporting", "Pile Type", "Baseline Pile Area, Verified (Decile)",
               "Zone-Level NTL (Mean)", "Zone-Level Area (Mean)", "Zone-Level Road Density (Mean)", "Zone-Level Population (2016)")

let <- str_replace_all(paste(LETTERS[seq(1,9)],".")," ", "")

var.names[4:12] <- paste(let, var.names[4:12])
var.names

##Changing Labels w/in Variables
c.cols <- 4:7
pre.ph3[,c.cols] = apply(pre.ph3[,c.cols], 2, function(x) (as.character(x)))
n.cols <- 8:ncol(pre.ph3)
pre.ph3[,n.cols] = apply(pre.ph3[,n.cols], 2, function(x) (as.numeric(x)))

##Changing labels within variables
table(pre.ph3$p1.p2.monitoring, useNA="always")
pre.ph3$p1.p2.monitoring <- ifelse(pre.ph3$p1.p2.monitoring=="1","yes",pre.ph3$p1.p2.monitoring)
pre.ph3$p1.p2.monitoring <- ifelse(pre.ph3$p1.p2.monitoring=="0","no",pre.ph3$p1.p2.monitoring)
table(pre.ph3$p1.p2.monitoring, useNA="always")

table(pre.ph3$b.type.site, useNA="always")
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="Pile within household for burning","Household pile",pre.ph3$b.type.site)
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="Pit within household for burrying","Household pile",pre.ph3$b.type.site)
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="Household sack or bin for collection","Household sack or bin",pre.ph3$b.type.site)
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="Littering in public place","Littering",pre.ph3$b.type.site)
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="Official dumping site or continer","Official dumping site",pre.ph3$b.type.site)
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="Unofficial dumping site (used by many households)","Unofficial dumping site",pre.ph3$b.type.site)
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="Small pile outside household","Small pile",pre.ph3$b.type.site)
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="Pit latrine","Other",pre.ph3$b.type.site)
pre.ph3$b.type.site <- ifelse(pre.ph3$b.type.site=="","Missing",pre.ph3$b.type.site)
table(pre.ph3$b.type.site, useNA="always")

pre.ph3$b.pile.area_dec <- str_replace_all(pre.ph3$b.pile.area_dec, " ", "")

##Creating factors with appropriate levels
table(pre.ph3$treat, useNA = "always")
pre.ph3$treat <- factor(pre.ph3$treat, levels=c("0", "1"))
table(pre.ph3$treat, useNA="always")

table(pre.ph3$div, useNA="always")
pre.ph3$div <- factor(pre.ph3$div, levels=c("central","kawempe","makindye","rubaga","nakawa"))
table(pre.ph3$div, useNA="always")

table(pre.ph3$p1.p2.monitoring, useNA="always")
pre.ph3$p1.p2.monitoring <- factor(pre.ph3$p1.p2.monitoring)
table(pre.ph3$p1.p2.monitoring, useNA="always")

table(pre.ph3$b.type.site, useNA="always")
pre.ph3$b.type.site <- ifelse(is.na(pre.ph3$b.type.site),"Missing",pre.ph3$b.type.site)
pre.ph3$b.type.site <- factor(pre.ph3$b.type.site, 
                              levels=c("Missing","Other","Household sack or bin","Household pile","Small pile",
                                       "Littering","Official dumping site","Unofficial dumping site"))
table(pre.ph3$b.type.site, useNA="always")

table(pre.ph3$b.pile.area_dec, useNA="always")
pre.ph3$b.pile.area_dec <- factor(pre.ph3$b.pile.area_dec, levels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))
table(pre.ph3$b.pile.area_dec, useNA="always")

table(pre.ph3$treat,pre.ph3$div)

plot.list <- list()
for(i in 4:7){
  p <- balance.plot_f(dta=pre.ph3, var=names(pre.ph3)[i], treat="treat", title=var.names[i])
  plot.list[[i]] <- p + 
    scale_fill_discrete("Assignment", labels = c("Control", "Treatment")) + 
    theme(text=element_text(size=6),
          plot.title = element_text(size=8))
}

for(i in 8:11){
  p <- balance.plot_n(dta=pre.ph3, var=names(pre.ph3)[i], treat="treat", title=var.names[i])
  plot.list[[i]] <-  p + 
    scale_fill_discrete("Assignment", labels = c("Control", "Treatment")) + 
    theme(text=element_text(size=6),
          plot.title = element_text(size=8))
}

plot.list <- plot.list[4:11]
balance_out <- ggarrange(plot.list[[1]], plot.list[[2]], plot.list[[3]], plot.list[[4]], plot.list[[5]],
                         plot.list[[6]], plot.list[[7]], plot.list[[8]], ncol=2, nrow=4, common.legend = T, legend="bottom")
balance_out

# ggsave("./SI Figures/FigureJ1.tiff",
#        balance_out,
#        width=5, height=5, units="in")
# ggsave("./SI Figures/FigureJ1.png",
#        balance_out,
#        width=5, height=5, units="in")


##### TABLES ######
## Table 1 & J1: RI Results, Primary Outcomes ----
ri.cph3.analysis <- subsetB_dta

#Midline 1 Waste Accumulation
ri.m1.size <- lm.ri(formula = m1.pile.area_m~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                    dta = ri.cph3.analysis,
                    treat.var = "treat",
                    clust_var = ri.cph3.analysis$zone.id,
                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                    sims=10000)#This runs with NAs removed from the treat and cluster variables

ri.m1.size_new <- lm.ri(formula = m1.size_final~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                        dta = ri.cph3.analysis,
                        treat.var = "treat",
                        clust_var = ri.cph3.analysis$zone.id,
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)#This runs with NAs removed from the treat and cluster variables


ri.m2.size <- lm.ri(formula = m2.pile.area_m~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                    dta = ri.cph3.analysis,
                    treat.var = "treat",
                    clust_var = ri.cph3.analysis$zone.id,
                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                    sims=10000)

ri.m2.size_new <- lm.ri(formula = m2.size_final~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                        dta = ri.cph3.analysis,
                        treat.var = "treat",
                        clust_var = ri.cph3.analysis$zone.id,
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)


#Rank Test

ri.rank_m1 <- lm.ri(formula=rank.m1~treat+p1.p2.monitoring+rank.b+lights.mean+road.density+area.km2+div+ls.pop_2016+rank.b,
                    dta=ri.cph3.analysis,
                    treat.var="treat",
                    clust_var = ri.cph3.analysis$zone.id, 
                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                    sims=10000)

ri.rank_m2 <- lm.ri(formula=rank.m2~treat+p1.p2.monitoring+rank.b+lights.mean+road.density+area.km2+div+ls.pop_2016+rank.b,
                    dta=ri.cph3.analysis,
                    treat.var="treat",
                    clust_var = ri.cph3.analysis$zone.id, 
                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                    sims=10000)

ri.rank_m1_new <- lm.ri(formula=rank.m1_new~treat+p1.p2.monitoring+rank.b_new+lights.mean+road.density+area.km2+div+ls.pop_2016+rank.b,
                        dta=ri.cph3.analysis,
                        treat.var="treat",
                        clust_var = ri.cph3.analysis$zone.id, 
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)

ri.rank_m2_new <- lm.ri(formula=rank.m2_new~treat+p1.p2.monitoring+rank.b_new+lights.mean+road.density+area.km2+div+ls.pop_2016+rank.b,
                        dta=ri.cph3.analysis,
                        treat.var="treat",
                        clust_var = ri.cph3.analysis$zone.id, 
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)

ri.wpd1 <- lm.ri(formula = m1.waste.pile_d~treat+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                 dta = ri.cph3.analysis,
                 treat.var = "treat",
                 clust_var = ri.cph3.analysis$zone.id,
                 m=length(unique(ri.cph3.analysis$zone.id))/2,
                 sims=10000)

ri.cph3.analysis$m1.waste.pile_d2 <- ifelse(ri.cph3.analysis$m1.waste.pile_d2=="Yes", 1, ri.cph3.analysis$m1.waste.pile_d2)
ri.cph3.analysis$m1.waste.pile_d2 <- ifelse(ri.cph3.analysis$m1.waste.pile_d2=="No", 0, ri.cph3.analysis$m1.waste.pile_d2)
ri.wpd1_adj <- lm.ri(formula = m1.waste.pile_d2~treat+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                     dta = ri.cph3.analysis,
                     treat.var = "treat",
                     clust_var = ri.cph3.analysis$zone.id,
                     m=length(unique(ri.cph3.analysis$zone.id))/2,
                     sims=10000)

ri.wpd2 <- lm.ri(formula = m2.waste.pile_d~treat+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                 dta = ri.cph3.analysis,
                 treat.var = "treat",
                 clust_var = ri.cph3.analysis$zone.id,
                 m=length(unique(ri.cph3.analysis$zone.id))/2,
                 sims=10000)

ri.cph3.analysis$m2.waste.pile_d2 <- ifelse(ri.cph3.analysis$m2.waste.pile_d2=="Yes", 1, ri.cph3.analysis$m2.waste.pile_d2)
ri.cph3.analysis$m2.waste.pile_d2 <- ifelse(ri.cph3.analysis$m2.waste.pile_d2=="No", 0, ri.cph3.analysis$m2.waste.pile_d2)
ri.wpd2_adj <- lm.ri(formula = m2.waste.pile_d2~treat+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                     dta = ri.cph3.analysis,
                     treat.var = "treat",
                     clust_var = ri.cph3.analysis$zone.id,
                     m=length(unique(ri.cph3.analysis$zone.id))/2,
                     sims=10000)



reg.adj_ri.raw <- data.frame(matrix(nrow=5, ncol=6))
rownames(reg.adj_ri.raw) <- c("Audit","Treatment Effect","Standard Error", "p-value", "N")
colnames(reg.adj_ri.raw) <- c("M1.1", "M2.1", "M1.2", "M2.2", "M1.3", "M2.3")

reg.adj_ri.raw$M1.1[1] <- "M1"
reg.adj_ri.raw$M2.1[1] <- "M2"
reg.adj_ri.raw$M1.2[1] <- "M1"
reg.adj_ri.raw$M2.2[1] <- "M2"
reg.adj_ri.raw$M1.3[1] <- "M1"
reg.adj_ri.raw$M2.3[1] <- "M2"


reg.adj_ri.raw$M1.1[2] <- ri.m1.size$ate
reg.adj_ri.raw$M1.1[3] <- ri.m1.size$se
reg.adj_ri.raw$M1.1[4] <- ri.m1.size$p.one.way.lesser
reg.adj_ri.raw$M1.1[5] <- ri.m1.size$N

reg.adj_ri.raw$M2.1[2] <- ri.m2.size$ate
reg.adj_ri.raw$M2.1[3] <- ri.m2.size$se
reg.adj_ri.raw$M2.1[4] <- ri.m2.size$p.one.way.lesser
reg.adj_ri.raw$M2.1[5] <- ri.m2.size$N

reg.adj_ri.raw$M1.2[2] <- ri.wpd1$ate
reg.adj_ri.raw$M1.2[3] <- ri.wpd1$se
reg.adj_ri.raw$M1.2[4] <- ri.wpd1$p.one.way.lesser
reg.adj_ri.raw$M1.2[5] <- ri.wpd1$N

reg.adj_ri.raw$M2.2[2] <- ri.wpd2$ate
reg.adj_ri.raw$M2.2[3] <- ri.wpd2$se
reg.adj_ri.raw$M2.2[4] <- ri.wpd2$p.one.way.lesser
reg.adj_ri.raw$M2.2[5] <- ri.wpd2$N

reg.adj_ri.raw$M1.3[2] <- ri.rank_m1$ate
reg.adj_ri.raw$M1.3[3] <- ri.rank_m1$se
reg.adj_ri.raw$M1.3[4] <- ri.rank_m1$p.one.way.lesser
reg.adj_ri.raw$M1.3[5] <- ri.rank_m1$N

reg.adj_ri.raw$M2.3[2] <- ri.rank_m2$ate
reg.adj_ri.raw$M2.3[3] <- ri.rank_m2$se
reg.adj_ri.raw$M2.3[4] <- ri.rank_m2$p.one.way.lesser
reg.adj_ri.raw$M2.3[5] <- ri.rank_m2$N

reg.adj_ri.raw[2,] <- round(as.numeric(reg.adj_ri.raw[2,]), 3)
reg.adj_ri.raw[3,] <- round(as.numeric(reg.adj_ri.raw[3,]), 3)
reg.adj_ri.raw[4,] <- round(as.numeric(reg.adj_ri.raw[4,]), 3)
reg.adj_ri.raw[5,] <- round(as.numeric(reg.adj_ri.raw[5,]), 3)


colnames(reg.adj_ri.raw) <- c("Pile Size", "Pile Size", 
                              "Pile Cleaned", "Pile Cleaned",
                              "Pile Rank", "Pile Rank")

reg.adj_ri.cln <- data.frame(matrix(nrow=5, ncol=6))
rownames(reg.adj_ri.cln) <- c("Audit", "Treatment Effect","Standard Error", "p-value", "N")
colnames(reg.adj_ri.cln) <- c("M1.1", "M2.1", "M1.2", "M2.2", "M1.3", "M2.3")

reg.adj_ri.cln$M1.1[1] <- "M1"
reg.adj_ri.cln$M1.2[1] <- "M1"
reg.adj_ri.cln$M1.3[1] <- "M1"
reg.adj_ri.cln$M2.1[1] <- "M2"
reg.adj_ri.cln$M2.2[1] <- "M2"
reg.adj_ri.cln$M2.3[1] <- "M2"

reg.adj_ri.cln$M1.1[2] <- ri.m1.size_new$ate
reg.adj_ri.cln$M1.1[3] <- ri.m1.size_new$se
reg.adj_ri.cln$M1.1[4] <- ri.m1.size_new$p.one.way.lesser
reg.adj_ri.cln$M1.1[5] <- ri.m1.size_new$N

reg.adj_ri.cln$M2.1[2] <- ri.m2.size_new$ate
reg.adj_ri.cln$M2.1[3] <- ri.m2.size_new$se
reg.adj_ri.cln$M2.1[4] <- ri.m2.size_new$p.one.way.lesser
reg.adj_ri.cln$M2.1[5] <- ri.m2.size_new$N

reg.adj_ri.cln$M1.2[2] <- ri.wpd1_adj$ate
reg.adj_ri.cln$M1.2[3] <- ri.wpd1_adj$se
reg.adj_ri.cln$M1.2[4] <- ri.wpd1_adj$p.one.way.lesser
reg.adj_ri.cln$M1.2[5] <- ri.wpd1_adj$N

reg.adj_ri.cln$M2.2[2] <- ri.wpd2_adj$ate
reg.adj_ri.cln$M2.2[3] <- ri.wpd2_adj$se
reg.adj_ri.cln$M2.2[4] <- ri.wpd2_adj$p.one.way.lesser
reg.adj_ri.cln$M2.2[5] <- ri.wpd2_adj$N

reg.adj_ri.cln$M1.3[2] <- ri.rank_m1_new$ate
reg.adj_ri.cln$M1.3[3] <- ri.rank_m1_new$se
reg.adj_ri.cln$M1.3[4] <- ri.rank_m1_new$p.one.way.lesser
reg.adj_ri.cln$M1.3[5] <- ri.rank_m1_new$N

reg.adj_ri.cln$M2.3[2] <- ri.rank_m2_new$ate
reg.adj_ri.cln$M2.3[3] <- ri.rank_m2_new$se
reg.adj_ri.cln$M2.3[4] <- ri.rank_m2_new$p.one.way.lesser
reg.adj_ri.cln$M2.3[5] <- ri.rank_m2_new$N

reg.adj_ri.cln[2,] <- round(as.numeric(reg.adj_ri.cln[2,]), 3)
reg.adj_ri.cln[3,] <- round(as.numeric(reg.adj_ri.cln[3,]), 3)
reg.adj_ri.cln[4,] <- round(as.numeric(reg.adj_ri.cln[4,]), 3)
reg.adj_ri.cln[5,] <- round(as.numeric(reg.adj_ri.cln[5,]), 3)


colnames(reg.adj_ri.cln) <- c("Pile Size", "Pile Size", 
                              "Pile Cleaned", "Pile Cleaned",
                              "Pile Rank", "Pile Rank")
#Table J1
out1 <- list(reg.adj_ri.raw)
attr(out1, "message") <- c("Note: results calculated using raw waste pile size measurements.")
print(xtableList(out1, caption="RI Results, Primary Dependent Variables (Raw)"), caption.placement="top", digits=3)

#Table 1
out2 <- list(reg.adj_ri.cln)
attr(out2, "message") <- c("Note: results calculated using cleaned waste pile size measurements.")
print(xtableList(out2, caption="RI Results, Primary Dependent Variables (Cleaned)"), caption.placement="top", digits=3)


## Table 2 & J2: RI, Secondary Outcomes  -----
ri.cph3.analysis <- subsetB_dta

ri.uw1_m1 <- lm.ri(formula = m1.ea.uwa1~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = ri.cph3.analysis,
                   treat.var = "treat",
                   clust_var = ri.cph3.analysis$zone.id,
                   m=length(unique(ri.cph3.analysis$zone.id))/2,
                   sims=10000)
ri.uw1_m2 <- lm.ri(formula = m1.ea.uwa2~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = ri.cph3.analysis,
                   treat.var = "treat",
                   clust_var = ri.cph3.analysis$zone.id,
                   m=length(unique(ri.cph3.analysis$zone.id))/2,
                   sims=10000)

ri.uw2_m1 <- lm.ri(formula = m1.ea.uwa2~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = ri.cph3.analysis,
                   treat.var = "treat",
                   clust_var = ri.cph3.analysis$zone.id,
                   m=length(unique(ri.cph3.analysis$zone.id))/2,
                   sims=10000)
ri.uw2_m2 <- lm.ri(formula = m2.ea.uwa2~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = ri.cph3.analysis,
                   treat.var = "treat",
                   clust_var = ri.cph3.analysis$zone.id,
                   m=length(unique(ri.cph3.analysis$zone.id))/2,
                   sims=10000)

ri.po1_m1 <- lm.ri(formula = m1.ea.po1~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = ri.cph3.analysis,
                   treat.var = "treat",
                   clust_var = ri.cph3.analysis$zone.id,
                   m=length(unique(ri.cph3.analysis$zone.id))/2,
                   sims=10000)
ri.po1_m2 <- lm.ri(formula = m2.ea.po1~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = ri.cph3.analysis,
                   treat.var = "treat",
                   clust_var = ri.cph3.analysis$zone.id,
                   m=length(unique(ri.cph3.analysis$zone.id))/2,
                   sims=10000)

ri.po2_m1 <- lm.ri(formula = m1.ea.po2~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = ri.cph3.analysis,
                   treat.var = "treat",
                   clust_var = ri.cph3.analysis$zone.id,
                   m=length(unique(ri.cph3.analysis$zone.id))/2,
                   sims=10000)
ri.po2_m2 <- lm.ri(formula = m2.ea.po2~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = ri.cph3.analysis,
                   treat.var = "treat",
                   clust_var = ri.cph3.analysis$zone.id,
                   m=length(unique(ri.cph3.analysis$zone.id))/2,
                   sims=10000)

ri.brn1_m1 <- lm.ri(formula = m1.ea.brn1~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                    dta = ri.cph3.analysis,
                    treat.var = "treat",
                    clust_var = ri.cph3.analysis$zone.id,
                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                    sims=10000)
ri.brn1_m2 <- lm.ri(formula = m2.ea.brn1~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                    dta = ri.cph3.analysis,
                    treat.var = "treat",
                    clust_var = ri.cph3.analysis$zone.id,
                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                    sims=10000)

ri.brn2_m1 <- lm.ri(formula = m1.ea.brn2~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                    dta = ri.cph3.analysis,
                    treat.var = "treat",
                    clust_var = ri.cph3.analysis$zone.id,
                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                    sims=10000)
ri.brn2_m2 <- lm.ri(formula = m2.ea.brn2~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                    dta = ri.cph3.analysis,
                    treat.var = "treat",
                    clust_var = ri.cph3.analysis$zone.id,
                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                    sims=10000)

ri.uw1_m1.cln <- lm.ri(formula = m1.ea.uwa1.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                       dta = ri.cph3.analysis,
                       treat.var = "treat",
                       clust_var = ri.cph3.analysis$zone.id,
                       m=length(unique(ri.cph3.analysis$zone.id))/2,
                       sims=10000)
ri.uw1_m2.cln <- lm.ri(formula = m1.ea.uwa2.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                       dta = ri.cph3.analysis,
                       treat.var = "treat",
                       clust_var = ri.cph3.analysis$zone.id,
                       m=length(unique(ri.cph3.analysis$zone.id))/2,
                       sims=10000)

ri.uw2_m1.cln <- lm.ri(formula = m1.ea.uwa2.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                       dta = ri.cph3.analysis,
                       treat.var = "treat",
                       clust_var = ri.cph3.analysis$zone.id,
                       m=length(unique(ri.cph3.analysis$zone.id))/2,
                       sims=10000)
ri.uw2_m2.cln <- lm.ri(formula = m2.ea.uwa2.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                       dta = ri.cph3.analysis,
                       treat.var = "treat",
                       clust_var = ri.cph3.analysis$zone.id,
                       m=length(unique(ri.cph3.analysis$zone.id))/2,
                       sims=10000)

ri.po1_m1.cln <- lm.ri(formula = m1.ea.po1.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                       dta = ri.cph3.analysis,
                       treat.var = "treat",
                       clust_var = ri.cph3.analysis$zone.id,
                       m=length(unique(ri.cph3.analysis$zone.id))/2,
                       sims=10000)
ri.po1_m2.cln <- lm.ri(formula = m2.ea.po1.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                       dta = ri.cph3.analysis,
                       treat.var = "treat",
                       clust_var = ri.cph3.analysis$zone.id,
                       m=length(unique(ri.cph3.analysis$zone.id))/2,
                       sims=10000)

ri.po2_m1.cln <- lm.ri(formula = m1.ea.po2.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                       dta = ri.cph3.analysis,
                       treat.var = "treat",
                       clust_var = ri.cph3.analysis$zone.id,
                       m=length(unique(ri.cph3.analysis$zone.id))/2,
                       sims=10000)
ri.po2_m2.cln <- lm.ri(formula = m2.ea.po2.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                       dta = ri.cph3.analysis,
                       treat.var = "treat",
                       clust_var = ri.cph3.analysis$zone.id,
                       m=length(unique(ri.cph3.analysis$zone.id))/2,
                       sims=10000)

ri.brn1_m1.cln <- lm.ri(formula = m1.ea.brn1.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                        dta = ri.cph3.analysis,
                        treat.var = "treat",
                        clust_var = ri.cph3.analysis$zone.id,
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)
ri.brn1_m2.cln <- lm.ri(formula = m2.ea.brn1.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                        dta = ri.cph3.analysis,
                        treat.var = "treat",
                        clust_var = ri.cph3.analysis$zone.id,
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)

ri.brn2_m1.cln <- lm.ri(formula = m1.ea.brn2.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                        dta = ri.cph3.analysis,
                        treat.var = "treat",
                        clust_var = ri.cph3.analysis$zone.id,
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)
ri.brn2_m2.cln <- lm.ri(formula = m2.ea.brn2.cln~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                        dta = ri.cph3.analysis,
                        treat.var = "treat",
                        clust_var = ri.cph3.analysis$zone.id,
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)

reg.adj_ri.alt.dvs_m1 <- data.frame(matrix(nrow=5, ncol=6))
rownames(reg.adj_ri.alt.dvs_m1) <- c("Variable Specification", "Treatment Effect","Standard Error", "p-value", "N")
colnames(reg.adj_ri.alt.dvs_m1) <- c("M1.1", "M2.1", "M1.2", "M2.2", "M1.3", "M2.3")

reg.adj_ri.alt.dvs_m1$M1.1[1] <- "A"
reg.adj_ri.alt.dvs_m1$M1.2[1] <- "A"
reg.adj_ri.alt.dvs_m1$M1.3[1] <- "A"
reg.adj_ri.alt.dvs_m1$M2.1[1] <- "B"
reg.adj_ri.alt.dvs_m1$M2.2[1] <- "B"
reg.adj_ri.alt.dvs_m1$M2.3[1] <- "B"

reg.adj_ri.alt.dvs_m1$M1.1[2] <- ri.uw1_m1$ate
reg.adj_ri.alt.dvs_m1$M1.1[3] <- ri.uw1_m1$se
reg.adj_ri.alt.dvs_m1$M1.1[4] <- ri.uw1_m1$p.one.way.lesser
reg.adj_ri.alt.dvs_m1$M1.1[5] <- ri.uw1_m1$N

reg.adj_ri.alt.dvs_m1$M2.1[2] <- ri.uw2_m1$ate
reg.adj_ri.alt.dvs_m1$M2.1[3] <- ri.uw2_m1$se
reg.adj_ri.alt.dvs_m1$M2.1[4] <- ri.uw2_m1$p.one.way.lesser
reg.adj_ri.alt.dvs_m1$M2.1[5] <- ri.uw2_m1$N

reg.adj_ri.alt.dvs_m1$M1.2[2] <- ri.po1_m1$ate
reg.adj_ri.alt.dvs_m1$M1.2[3] <- ri.po1_m1$se
reg.adj_ri.alt.dvs_m1$M1.2[4] <- ri.po1_m1$p.one.way.lesser
reg.adj_ri.alt.dvs_m1$M1.2[5] <- ri.po1_m1$N

reg.adj_ri.alt.dvs_m1$M2.2[2] <- ri.po2_m1$ate
reg.adj_ri.alt.dvs_m1$M2.2[3] <- ri.po2_m1$se
reg.adj_ri.alt.dvs_m1$M2.2[4] <- ri.po2_m1$p.one.way.lesser
reg.adj_ri.alt.dvs_m1$M2.2[5] <- ri.po2_m1$N

reg.adj_ri.alt.dvs_m1$M1.3[2] <- ri.brn1_m1$ate
reg.adj_ri.alt.dvs_m1$M1.3[3] <- ri.brn1_m1$se
reg.adj_ri.alt.dvs_m1$M1.3[4] <- ri.brn1_m1$p.one.way.lesser
reg.adj_ri.alt.dvs_m1$M1.3[5] <- ri.brn1_m1$N

reg.adj_ri.alt.dvs_m1$M2.3[2] <- ri.brn2_m1$ate
reg.adj_ri.alt.dvs_m1$M2.3[3] <- ri.brn2_m1$se
reg.adj_ri.alt.dvs_m1$M2.3[4] <- ri.brn2_m1$p.one.way.lesser
reg.adj_ri.alt.dvs_m1$M2.3[5] <- ri.brn2_m1$N

reg.adj_ri.alt.dvs_m1[2,] <- round(as.numeric(reg.adj_ri.alt.dvs_m1[2,]), 3)
reg.adj_ri.alt.dvs_m1[3,] <- round(as.numeric(reg.adj_ri.alt.dvs_m1[3,]), 3)
reg.adj_ri.alt.dvs_m1[4,] <- round(as.numeric(reg.adj_ri.alt.dvs_m1[4,]), 3)
reg.adj_ri.alt.dvs_m1[5,] <- round(as.numeric(reg.adj_ri.alt.dvs_m1[5,]), 3)


colnames(reg.adj_ri.alt.dvs_m1) <- c("Uncontained", "Uncontained",
                                     "Disorganized", "Disorganized",
                                     "Burnt", "Burnt")

reg.adj_ri.alt.dvs.cln_m1 <- data.frame(matrix(nrow=5, ncol=6))
rownames(reg.adj_ri.alt.dvs.cln_m1) <- c("Variable Specification", "Treatment Effect","Standard Error", "p-value", "N")
colnames(reg.adj_ri.alt.dvs.cln_m1) <- c("M1.1", "M2.1", "M1.2", "M2.2", "M1.3", "M2.3")

reg.adj_ri.alt.dvs.cln_m1$M1.1[1] <- "A"
reg.adj_ri.alt.dvs.cln_m1$M1.2[1] <- "A"
reg.adj_ri.alt.dvs.cln_m1$M1.3[1] <- "A"
reg.adj_ri.alt.dvs.cln_m1$M2.1[1] <- "B"
reg.adj_ri.alt.dvs.cln_m1$M2.2[1] <- "B"
reg.adj_ri.alt.dvs.cln_m1$M2.3[1] <- "B"

reg.adj_ri.alt.dvs.cln_m1$M1.1[2] <- ri.uw1_m1.cln$ate
reg.adj_ri.alt.dvs.cln_m1$M1.1[3] <- ri.uw1_m1.cln$se
reg.adj_ri.alt.dvs.cln_m1$M1.1[4] <- ri.uw1_m1.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m1$M1.1[5] <- ri.uw1_m1.cln$N

reg.adj_ri.alt.dvs.cln_m1$M2.1[2] <- ri.uw2_m1.cln$ate
reg.adj_ri.alt.dvs.cln_m1$M2.1[3] <- ri.uw2_m1.cln$se
reg.adj_ri.alt.dvs.cln_m1$M2.1[4] <- ri.uw2_m1.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m1$M2.1[5] <- ri.uw2_m1.cln$N

reg.adj_ri.alt.dvs.cln_m1$M1.2[2] <- ri.po1_m1.cln$ate
reg.adj_ri.alt.dvs.cln_m1$M1.2[3] <- ri.po1_m1.cln$se
reg.adj_ri.alt.dvs.cln_m1$M1.2[4] <- ri.po1_m1.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m1$M1.2[5] <- ri.po1_m1.cln$N

reg.adj_ri.alt.dvs.cln_m1$M2.2[2] <- ri.po2_m1.cln$ate
reg.adj_ri.alt.dvs.cln_m1$M2.2[3] <- ri.po2_m1.cln$se
reg.adj_ri.alt.dvs.cln_m1$M2.2[4] <- ri.po2_m1.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m1$M2.2[5] <- ri.po2_m1.cln$N

reg.adj_ri.alt.dvs.cln_m1$M1.3[2] <- ri.brn1_m1.cln$ate
reg.adj_ri.alt.dvs.cln_m1$M1.3[3] <- ri.brn1_m1.cln$se
reg.adj_ri.alt.dvs.cln_m1$M1.3[4] <- ri.brn1_m1.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m1$M1.3[5] <- ri.brn1_m1.cln$N

reg.adj_ri.alt.dvs.cln_m1$M2.3[2] <- ri.brn2_m1.cln$ate
reg.adj_ri.alt.dvs.cln_m1$M2.3[3] <- ri.brn2_m1.cln$se
reg.adj_ri.alt.dvs.cln_m1$M2.3[4] <- ri.brn2_m1.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m1$M2.3[5] <- ri.brn2_m1.cln$N

reg.adj_ri.alt.dvs.cln_m1[2,] <- round(as.numeric(reg.adj_ri.alt.dvs.cln_m1[2,]),3)
reg.adj_ri.alt.dvs.cln_m1[3,] <- round(as.numeric(reg.adj_ri.alt.dvs.cln_m1[3,]),3)
reg.adj_ri.alt.dvs.cln_m1[4,] <- round(as.numeric(reg.adj_ri.alt.dvs.cln_m1[4,]),3)
reg.adj_ri.alt.dvs.cln_m1[5,] <- round(as.numeric(reg.adj_ri.alt.dvs.cln_m1[5,]),3)

#Benjamini–Hochberg–Yekutieli correction, as pre-specified
reg.adj_ri.alt.dvs.cln_m1[6,] <- round(p.adjust(reg.adj_ri.alt.dvs.cln_m1[4,], method="BH"), 3)

reg.adj_ri.alt.dvs.cln_m1 <- reg.adj_ri.alt.dvs.cln_m1[c(1,2,3,4,6,5),]
row.names(reg.adj_ri.alt.dvs.cln_m1)[5] <- "BHY p-value"

colnames(reg.adj_ri.alt.dvs.cln_m1) <- c("Uncontained", "Uncontained",
                                         "Disorganized", "Disorganized",
                                         "Burnt", "Burnt")

#Table J2a
out3 <- list(reg.adj_ri.alt.dvs_m1)
attr(out3, "message") <- c("Note: results calculated using raw waste pile size measurements.")
print(xtableList(out3, caption="RI Results, Midline 1, Secondary Dependent Variables (Raw)"), digits=3, caption.placement="top")

#Table 2a
out4 <- list(reg.adj_ri.alt.dvs.cln_m1)
attr(out4, "message") <- c("Note: results calculated using cleaned waste pile size measurements.")
print(xtableList(out4, caption="RI Results, Midline 1, Secondary Dependent Variables (Cleaned)"), digits=3, caption.placement="top")


reg.adj_ri.alt.dvs_m2 <- data.frame(matrix(nrow=5, ncol=6))
rownames(reg.adj_ri.alt.dvs_m2) <- c("Variable Specification", "Treatment Effect","Standard Error", "p-value", "N")
colnames(reg.adj_ri.alt.dvs_m2) <- c("M1.1", "M2.1", "M1.2", "M2.2", "M1.3", "M2.3")

reg.adj_ri.alt.dvs_m2$M1.1[1] <- "A"
reg.adj_ri.alt.dvs_m2$M1.2[1] <- "A"
reg.adj_ri.alt.dvs_m2$M1.3[1] <- "A"
reg.adj_ri.alt.dvs_m2$M2.1[1] <- "B"
reg.adj_ri.alt.dvs_m2$M2.2[1] <- "B"
reg.adj_ri.alt.dvs_m2$M2.3[1] <- "B"

reg.adj_ri.alt.dvs_m2$M1.1[2] <- ri.uw1_m2$ate
reg.adj_ri.alt.dvs_m2$M1.1[3] <- ri.uw1_m2$se
reg.adj_ri.alt.dvs_m2$M1.1[4] <- ri.uw1_m2$p.one.way.lesser
reg.adj_ri.alt.dvs_m2$M1.1[5] <- ri.uw1_m2$N

reg.adj_ri.alt.dvs_m2$M2.1[2] <- ri.uw2_m2$ate
reg.adj_ri.alt.dvs_m2$M2.1[3] <- ri.uw2_m2$se
reg.adj_ri.alt.dvs_m2$M2.1[4] <- ri.uw2_m2$p.one.way.lesser
reg.adj_ri.alt.dvs_m2$M2.1[5] <- ri.uw2_m2$N

reg.adj_ri.alt.dvs_m2$M1.2[2] <- ri.po1_m2$ate
reg.adj_ri.alt.dvs_m2$M1.2[3] <- ri.po1_m2$se
reg.adj_ri.alt.dvs_m2$M1.2[4] <- ri.po1_m2$p.one.way.lesser
reg.adj_ri.alt.dvs_m2$M1.2[5] <- ri.po1_m2$N

reg.adj_ri.alt.dvs_m2$M2.2[2] <- ri.po2_m2$ate
reg.adj_ri.alt.dvs_m2$M2.2[3] <- ri.po2_m2$se
reg.adj_ri.alt.dvs_m2$M2.2[4] <- ri.po2_m2$p.one.way.lesser
reg.adj_ri.alt.dvs_m2$M2.2[5] <- ri.po2_m2$N

reg.adj_ri.alt.dvs_m2$M1.3[2] <- ri.brn1_m2$ate
reg.adj_ri.alt.dvs_m2$M1.3[3] <- ri.brn1_m2$se
reg.adj_ri.alt.dvs_m2$M1.3[4] <- ri.brn1_m2$p.one.way.lesser
reg.adj_ri.alt.dvs_m2$M1.3[5] <- ri.brn1_m2$N

reg.adj_ri.alt.dvs_m2$M2.3[2] <- ri.brn2_m2$ate
reg.adj_ri.alt.dvs_m2$M2.3[3] <- ri.brn2_m2$se
reg.adj_ri.alt.dvs_m2$M2.3[4] <- ri.brn2_m2$p.one.way.lesser
reg.adj_ri.alt.dvs_m2$M2.3[5] <- ri.brn2_m2$N

reg.adj_ri.alt.dvs_m2[2,] <- round(as.numeric(reg.adj_ri.alt.dvs_m2[2,]), 3)
reg.adj_ri.alt.dvs_m2[3,] <- round(as.numeric(reg.adj_ri.alt.dvs_m2[3,]), 3)
reg.adj_ri.alt.dvs_m2[4,] <- round(as.numeric(reg.adj_ri.alt.dvs_m2[4,]), 3)
reg.adj_ri.alt.dvs_m2[5,] <- round(as.numeric(reg.adj_ri.alt.dvs_m2[5,]), 3)


colnames(reg.adj_ri.alt.dvs_m2) <- c("Uncontained", "Uncontained",
                                     "Disorganized", "Disorganized",
                                     "Burnt", "Burnt")

reg.adj_ri.alt.dvs.cln_m2 <- data.frame(matrix(nrow=5, ncol=6))
rownames(reg.adj_ri.alt.dvs.cln_m2) <- c("Variable Specification", "Treatment Effect","Standard Error", "p-value", "N")
colnames(reg.adj_ri.alt.dvs.cln_m2) <- c("M1.1", "M2.1", "M1.2", "M2.2", "M1.3", "M2.3")

reg.adj_ri.alt.dvs.cln_m2$M1.1[1] <- "A"
reg.adj_ri.alt.dvs.cln_m2$M1.2[1] <- "A"
reg.adj_ri.alt.dvs.cln_m2$M1.3[1] <- "A"
reg.adj_ri.alt.dvs.cln_m2$M2.1[1] <- "B"
reg.adj_ri.alt.dvs.cln_m2$M2.2[1] <- "B"
reg.adj_ri.alt.dvs.cln_m2$M2.3[1] <- "B"

reg.adj_ri.alt.dvs.cln_m2$M1.1[2] <- ri.uw1_m2.cln$ate
reg.adj_ri.alt.dvs.cln_m2$M1.1[3] <- ri.uw1_m2.cln$se
reg.adj_ri.alt.dvs.cln_m2$M1.1[4] <- ri.uw1_m2.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m2$M1.1[5] <- ri.uw1_m2.cln$N

reg.adj_ri.alt.dvs.cln_m2$M2.1[2] <- ri.uw2_m2.cln$ate
reg.adj_ri.alt.dvs.cln_m2$M2.1[3] <- ri.uw2_m2.cln$se
reg.adj_ri.alt.dvs.cln_m2$M2.1[4] <- ri.uw2_m2.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m2$M2.1[5] <- ri.uw2_m2.cln$N

reg.adj_ri.alt.dvs.cln_m2$M1.2[2] <- ri.po1_m2.cln$ate
reg.adj_ri.alt.dvs.cln_m2$M1.2[3] <- ri.po1_m2.cln$se
reg.adj_ri.alt.dvs.cln_m2$M1.2[4] <- ri.po1_m2.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m2$M1.2[5] <- ri.po1_m2.cln$N

reg.adj_ri.alt.dvs.cln_m2$M2.2[2] <- ri.po2_m2.cln$ate
reg.adj_ri.alt.dvs.cln_m2$M2.2[3] <- ri.po2_m2.cln$se
reg.adj_ri.alt.dvs.cln_m2$M2.2[4] <- ri.po2_m2.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m2$M2.2[5] <- ri.po2_m2.cln$N

reg.adj_ri.alt.dvs.cln_m2$M1.3[2] <- ri.brn1_m2.cln$ate
reg.adj_ri.alt.dvs.cln_m2$M1.3[3] <- ri.brn1_m2.cln$se
reg.adj_ri.alt.dvs.cln_m2$M1.3[4] <- ri.brn1_m2.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m2$M1.3[5] <- ri.brn1_m2.cln$N

reg.adj_ri.alt.dvs.cln_m2$M2.3[2] <- ri.brn2_m2.cln$ate
reg.adj_ri.alt.dvs.cln_m2$M2.3[3] <- ri.brn2_m2.cln$se
reg.adj_ri.alt.dvs.cln_m2$M2.3[4] <- ri.brn2_m2.cln$p.one.way.lesser
reg.adj_ri.alt.dvs.cln_m2$M2.3[5] <- ri.brn2_m2.cln$N

reg.adj_ri.alt.dvs.cln_m2[2,] <- round(as.numeric(reg.adj_ri.alt.dvs.cln_m2[2,]),3)
reg.adj_ri.alt.dvs.cln_m2[3,] <- round(as.numeric(reg.adj_ri.alt.dvs.cln_m2[3,]),3)
reg.adj_ri.alt.dvs.cln_m2[4,] <- round(as.numeric(reg.adj_ri.alt.dvs.cln_m2[4,]),3)
reg.adj_ri.alt.dvs.cln_m2[5,] <- round(as.numeric(reg.adj_ri.alt.dvs.cln_m2[5,]),3)

#Benjamini–Hochberg–Yekutieli correction, as pre-specified
reg.adj_ri.alt.dvs.cln_m2[6,] <- round(p.adjust(reg.adj_ri.alt.dvs.cln_m2[4,], method="BH"),3)

reg.adj_ri.alt.dvs.cln_m2 <- reg.adj_ri.alt.dvs.cln_m2[c(1,2,3,4,6,5),]
row.names(reg.adj_ri.alt.dvs.cln_m2)[5] <- "BHY p-value"

colnames(reg.adj_ri.alt.dvs.cln_m2) <- c("Uncontained", "Uncontained",
                                         "Disorganized", "Disorganized",
                                         "Burnt", "Burnt")

#Table J2b
out3 <- list(reg.adj_ri.alt.dvs_m2)
attr(out3, "message") <- c("Note: results calculated using raw waste pile size measurements.")
print(xtableList(out3, caption="RI Results, Midline 2, Secondary Dependent Variables (Raw)"), digits=3, caption.placement="top")

#Table 2b
out4 <- list(reg.adj_ri.alt.dvs.cln_m2)
attr(out4, "message") <- c("Note: results calculated using cleaned waste pile size measurements.")
print(xtableList(out4, caption="RI Results, Midline 2, Secondary Dependent Variables (Cleaned)"), digits=3, caption.placement="top")


## Table F1: Scalar Values, Alternative Dependent Variables -----
main_dta$m1.ea.uwa1.p <- NA
main_dta$m1.ea.uwa1.p <- ifelse(main_dta$m1.waste.pile_d == "No",
                                    0.0, main_dta$m1.ea.uwa1)
main_dta$m1.ea.uwa1.p <- ifelse(main_dta$m1.waste.stor == "All of the rubbish is neatly contained with sacks or other containers",
                                    0.0, main_dta$m1.ea.uwa1)
main_dta$m1.ea.uwa1.p <- ifelse(main_dta$m1.waste.stor == "Most of the rubbish is organized in sacks or other containers",
                                    0.333, main_dta$m1.ea.uwa1)
main_dta$m1.ea.uwa1.p <- ifelse(main_dta$m1.waste.stor == "Very little rubbish is contained within sacks or containers",
                                    0.667, main_dta$m1.ea.uwa1)
main_dta$m1.ea.uwa1.p <- ifelse(main_dta$m1.waste.stor == "No rubbish is contained in sacks or containers",
                                    1.0, main_dta$m1.ea.uwa1)
tab_a <- data.frame(Characteristic=c(rep("Waste Containment", 5), rep("Waste Organization", 5), rep("Waste Burning", 4)),
                    `Abbreviated Response`=c("Pile Cleaned", "Fully Contained", "Mostly Contained", "Mostly Uncontained", "Uncontained",
                                             "Pile Cleaned", "Fully Organized", "Mostly Organized", "Mostly Unorganized", "Unorganized",
                                             "Pile Cleaned", "No Burning", "Less than 50% burnt", "More than 50% burnt"),
                    Scalar=c(0, 0, 0.333, 0.667, 1,
                             0, 0, 0.333, 0.667, 1, 
                             0, 0, 0.45, 0.55))

tab_b <- data.frame(Characteristic=c(rep("Waste Containment", 5), rep("Waste Organization", 5), rep("Waste Burning", 4)),
                    `Abbreviated Response`=c("Pile Cleaned", "Fully Contained", "Mostly Contained", "Mostly Uncontained", "Uncontained",
                                             "Pile Cleaned", "Fully Organized", "Mostly Organized", "Mostly Unorganized", "Unorganized",
                                             "Pile Cleaned", "No Burning", "Less than 50% burnt", "More than 50% burnt"),
                    Scalar=c(0, 0, 0.25, 0.75, 1,
                             0, 0, 0.25, 0.75, 1, 
                             0, 0, 0.33, 0.667))                            

print(xtable(tab_a, "Scalar Values, Variable Specification A"), include.rownames=F)                               
print(xtable(tab_b, "Scalar Values, Variable Specification B"), include.rownames=F)                               

## Table I1: Spillover Results, Primary Dependent Variables (Cleaned) -----
cph3.analysis_spill <- subset(subsetB_dta, indirect.prob!=0) #All zones potentially affected by indirect Ph3 spillover

cph3.analysis_spill$realized.prob <- NA
cph3.analysis_spill$realized.prob <- ifelse(cph3.analysis_spill$treat==1 & cph3.analysis_spill$indirect==1, 
                                            cph3.analysis_spill$d1i1, cph3.analysis_spill$realized.prob)
cph3.analysis_spill$realized.prob <- ifelse(cph3.analysis_spill$treat==1 & cph3.analysis_spill$indirect==0, 
                                            cph3.analysis_spill$d1i0, cph3.analysis_spill$realized.prob)
cph3.analysis_spill$realized.prob <- ifelse(cph3.analysis_spill$treat==0 & cph3.analysis_spill$indirect==1, 
                                            cph3.analysis_spill$d0i1, cph3.analysis_spill$realized.prob)
cph3.analysis_spill$realized.prob <- ifelse(cph3.analysis_spill$treat==0 & cph3.analysis_spill$indirect==0, 
                                            cph3.analysis_spill$d0i0, cph3.analysis_spill$realized.prob)

cph3.analysis_spill$spillover <- NA
cph3.analysis_spill$spillover <- paste("d", cph3.analysis_spill$treat, "i", cph3.analysis_spill$indirect, sep="")
table(cph3.analysis_spill$spillover, useNA="always")
cph3.analysis_spill$spillover <- ordered(cph3.analysis_spill$spillover, levels=c("d0i0", "d0i1", "d1i0", "d1i1"))
table(cph3.analysis_spill$spillover, useNA="always")

#Size
spill.m1_area <- lm(m1.size_final~spillover+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                    data = cph3.analysis_spill, weights=1/realized.prob)
summary(spill.m1_area)
spill.m1_area.c <- lm.cluster(spill.m1_area, cph3.analysis_spill$zone.id)


spill.m2_area <- lm(m2.size_final~spillover+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                    data = cph3.analysis_spill, weights=1/realized.prob)
summary(spill.m2_area)
spill.m2_area.c <- lm.cluster(spill.m2_area, cph3.analysis_spill$zone.id)

#Dummy
spill.m1_d <- lm(m1.waste.pile_d~spillover+p1.p2.monitoring+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                 data = cph3.analysis_spill, weights=1/realized.prob)
spill.m1_d.c <- lm.cluster(spill.m1_d, cph3.analysis_spill$zone.id)

spill.m2_d <- lm(m2.waste.pile_d~spillover+p1.p2.monitoring+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                 data = cph3.analysis_spill, weights=1/realized.prob)
spill.m2_d.c <- lm.cluster(spill.m2_d, cph3.analysis_spill$zone.id)

#Rank, re-ranked for subset
cph3.analysis_spill$rank.b_new2 <- rank(cph3.analysis_spill$b.pile.area_m, na.last = "keep")
cph3.analysis_spill$rank.m1_new2 <- rank(cph3.analysis_spill$m1.size_final, na.last ="keep")
cph3.analysis_spill$rank.m2_new2 <- rank(cph3.analysis_spill$m2.size_final, na.last ="keep")

spill.m1_rank <- lm(rank.m1_new2~spillover+p1.p2.monitoring+rank.b_new2+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                    data = cph3.analysis_spill, weights=1/realized.prob)
spill.m1_rank.c <- lm.cluster(spill.m1_rank, cph3.analysis_spill$zone.id)

spill.m2_rank <- lm(rank.m2_new2~spillover+p1.p2.monitoring+rank.b_new2+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                    data = cph3.analysis_spill, weights=1/realized.prob)
spill.m2_rank.c <- lm.cluster(spill.m2_rank, cph3.analysis_spill$zone.id)

stargazer(spill.m1_d, spill.m1_area, spill.m1_rank, spill.m2_d, spill.m2_area, spill.m2_rank, 
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No), Pile Size ($m^{2}$), Pile Size Rank",
          dep.var.labels = c("M1 Cleaned","M1 Size","M1 Rank","M2 Cleaned","M2 Size","M2 Rank"),
          covariate.labels = c("Control, Indirect", "Treated, No Indirect","Treated, Indirect"),
          omit = c("p1.p2.monitoring", "b.pile.area_m", "rank.b", "lights.mean", "road.density", "area.km2", "div", "ls.pop_2016"),
          p = list(spill.m1_d.c[[1]][c(2:4,1),4], spill.m1_area.c[[1]][c(2:4,1),4], spill.m1_rank.c[[1]][c(2:4,1),4],
                   spill.m2_d.c[[1]][c(2:4,1),4], spill.m2_area.c[[1]][c(2:4,1),4], spill.m2_rank.c[[1]][c(2:4,1),4]),
          se = list(spill.m1_d.c[[1]][c(2:4,1),2], spill.m1_area.c[[1]][c(2:4,1),2], spill.m1_rank.c[[1]][c(2:4,1),2],
                    spill.m2_d.c[[1]][c(2:4,1),2], spill.m2_area.c[[1]][c(2:4,1),2], spill.m2_rank.c[[1]][c(2:4,1),2]),
          add.lines = list(c("Covariates", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")),
          df = FALSE, 
          notes.label = "Notes: two-tailed tests; weighted by inverse probability of assignment to exposure type; baseline is [control, no indirect]; rank variables are specific to the subset. For a full set of variable descriptions, see notes in Table \ref{tab:main-ate.cln}.",
          omit.stat = c("rsq","ser","adj.rsq"),
          intercept.bottom = TRUE)

## Table H2: No Spillover Results, Primary Dependent Variables (Cleaned) -----
cph3.analysis_nspill <- subset(subsetB_dta, indirect.prob==0) #All zones not affected by indirect Ph3 spillover
nspill.m1_area <- lm(m1.size_final~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                     data = cph3.analysis_nspill)
summary(nspill.m1_area)
nspill.m1_area.c <- lm.cluster(nspill.m1_area, cph3.analysis_nspill$zone.id)


nspill.m2_area <- lm(m2.size_final~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                     data = cph3.analysis_nspill)
summary(nspill.m2_area)
nspill.m2_area.c <- lm.cluster(nspill.m2_area, cph3.analysis_nspill$zone.id)

#Dummy
nspill.m1_d <- lm(m1.waste.pile_d~treat+p1.p2.monitoring+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                  data = cph3.analysis_nspill)
nspill.m1_d.c <- lm.cluster(nspill.m1_d, cph3.analysis_nspill$zone.id)

nspill.m2_d <- lm(m2.waste.pile_d~treat+p1.p2.monitoring+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                  data = cph3.analysis_nspill)
nspill.m2_d.c <- lm.cluster(nspill.m2_d, cph3.analysis_nspill$zone.id)

#Rank, re-ranked for subset
cph3.analysis_nspill$rank.b_new2 <- rank(cph3.analysis_nspill$b.pile.area_m, na.last = "keep")
cph3.analysis_nspill$rank.m1_new2 <- rank(cph3.analysis_nspill$m1.size_final, na.last ="keep")
cph3.analysis_nspill$rank.m2_new2 <- rank(cph3.analysis_nspill$m2.size_final, na.last ="keep")

nspill.m1_rank <- lm(rank.m1_new2~treat+p1.p2.monitoring+rank.b_new2+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                     data = cph3.analysis_nspill)
nspill.m1_rank.c <- lm.cluster(nspill.m1_rank, cph3.analysis_nspill$zone.id)

nspill.m2_rank <- lm(rank.m2_new2~treat+p1.p2.monitoring+rank.b_new2+lights.mean+road.density+area.km2+div+ls.pop_2016, 
                     data = cph3.analysis_nspill)
nspill.m2_rank.c <- lm.cluster(nspill.m2_rank, cph3.analysis_nspill$zone.id)

stargazer(nspill.m1_d, nspill.m1_area, nspill.m1_rank, nspill.m2_d, nspill.m2_area, nspill.m2_rank, 
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No), Pile Size ($m^{2}$), Pile Size Rank",
          dep.var.labels = c("M1 Cleaned","M1 Size","M1 Rank","M2 Cleaned","M2 Size","M2 Rank"),
          covariate.labels = c("Treatment", "P1/P2 Monitoring"),
          omit = c("p1.p2.monitoring", "b.pile.area_m", "rank.b", "lights.mean", "road.density", "area.km2", "div", "ls.pop_2016"),
          p = list(nspill.m1_d.c[[1]][c(2:4,1),4], nspill.m1_area.c[[1]][c(2:4,1),4], nspill.m1_rank.c[[1]][c(2:4,1),4],
                   nspill.m2_d.c[[1]][c(2:4,1),4], nspill.m2_area.c[[1]][c(2:4,1),4], nspill.m2_rank.c[[1]][c(2:4,1),4]),
          se = list(nspill.m1_d.c[[1]][c(2:4,1),2], nspill.m1_area.c[[1]][c(2:4,1),2], nspill.m2_rank.c[[1]][c(2:4,1),2],
                    nspill.m2_d.c[[1]][c(2:4,1),2], nspill.m2_area.c[[1]][c(2:4,1),2], nspill.m2_rank.c[[1]][c(2:4,1),2]),
          add.lines = list(c("Covariates", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")),
          df = FALSE, 
          notes.label = "Notes: two-tailed tests; rank variables are specific to the subset. For a full set of variable descriptions, see notes in Table \ref{tab:main-ate.cln}.",
          omit.stat = c("rsq","ser","adj.rsq"),
          intercept.bottom = TRUE)

## Table J3: HTE, Party ----
subsetB_dta$Party <- as.character(subsetB_dta$Party)
subsetB_dta$party.type <- ifelse(subsetB_dta$Party!="NRM" & subsetB_dta$Party!="INDEPENDENT", "OPPOSITION", subsetB_dta$Party)
subsetB_dta$party.type <- factor(subsetB_dta$party.type, levels=c("NRM","INDEPENDENT","OPPOSITION"))

##Binary
lm.wpd1_p <- lm(m1.waste.pile_d~treat*party.type+p1.p2.monitoring+lights.mean+road.density+area.km2+div, data = subsetB_dta) #Changed from _all in main file
summary(lm.wpd1_p)
lm.wpd1_pc <- lm.cluster(lm.wpd1_p, subsetB_dta$zone.id)

lm.wpd2_p <- lm(m2.waste.pile_d~treat*party.type+p1.p2.monitoring+lights.mean+road.density+area.km2+div, data = subsetB_dta) #Changed from _all in main file
summary(lm.wpd2_p)
lm.wpd2_pc <- lm.cluster(lm.wpd2_p, subsetB_dta$zone.id)

## Pile Size
lm.area1_p <- lm(m1.size_final~treat*party.type+p1.p2.monitoring+lights.mean+road.density+area.km2+b.pile.area_m+div, data = subsetB_dta) #Changed from _all in main file
summary(lm.area1_p)
lm.area1_pc <- lm.cluster(lm.area1_p, subsetB_dta$zone.id)

lm.area2_p <- lm(m2.size_final~treat*party.type+p1.p2.monitoring+lights.mean+road.density+area.km2+b.pile.area_m+div, data = subsetB_dta) #Changed from _all in main file
summary(lm.area2_p)
lm.area2_pc <- lm.cluster(lm.area2_p, subsetB_dta$zone.id)

##Output Table
keepA <- c("treat", "party.typeINDEPENDENT", "party.typeOPPOSITION", "treat:party.typeINDEPENDENT", "treat:party.typeOPPOSITION")
keepB <- c("treat", "party.typeINDEPENDENT", "party.typeOPPOSITION", "b.pile.area_m", "treat:party.typeINDEPENDENT", "treat:party.typeOPPOSITION")
stargazer(lm.wpd1_p, lm.area1_p, lm.wpd2_p, lm.area2_p, type = "latex",
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No) or Waste Pile Size ($m^{2}$)",
          dep.var.labels = c("M1 Cleaned","M1 Size","M2 Cleaned","M2 Size"),
          keep =c("treat","party.typeINDEPENDENT","party.typeOPPOSITION","b.pile.area_m","treat:party.typeINDEPENDENT","treat:party.typeOPPOSITION"),
          covariate.labels = c("Treatment","Indepedent","Opposition","Baseline Pile Area",
                               "Treatment X Indepedent","Treatment X Opposition"),
          add.lines = list(c("Covariates", "Yes", "Yes","Yes","Yes")),
          p = list(lm.wpd1_pc[[1]][keepA, "Pr(>|t|)"], lm.area1_pc[[1]][keepB, "Pr(>|t|)"], 
                   lm.wpd2_pc[[1]][keepA,"Pr(>|t|)"], lm.area2_pc[[1]][keepB,"Pr(>|t|)"]),
          se = list(lm.wpd1_pc[[1]][keepA, "Std. Error"], lm.area1_pc[[1]][keepB, "Std. Error"], 
                    lm.wpd2_pc[[1]][keepA,"Std. Error"], lm.area2_pc[[1]][keepB,"Std. Error"]),
          notes.label = "<em>Note: </em> two-tailed tests",
          df = FALSE,intercept.bottom = TRUE)

## Table J4: Subgroup Effects; Response Rates -----
rrate_m1 <- lm(m1.size_final~mean.rrate+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(rrate_m1)
rrate_m1c <- lm.cluster(rrate_m1, cluster=hte_dta$zone.id)

rrate_m2 <- lm(m2.size_final~mean.rrate+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(rrate_m2)
rrate_m2c <- lm.cluster(rrate_m2, cluster=hte_dta$zone.id)

rrate_m1.d <- lm(m1.waste.pile_d~mean.rrate+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(rrate_m1.d)
rrate_m1.dc <- lm.cluster(rrate_m1.d, cluster=hte_dta$zone.id)

rrate_m2.d <- lm(m2.waste.pile_d~mean.rrate+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(rrate_m2.d)
rrate_m2.dc <- lm.cluster(rrate_m2.d, cluster=hte_dta$zone.id)

stargazer(rrate_m1.d, rrate_m1, rrate_m2.d, rrate_m2,
          type="latex",
          title="Estimated Effects of Treatment Conditional on Zone-Level Response Rate",
          label="squeaky-wheel2",
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No) or Waste Pile Size ($m^{2}$)",
          dep.var.labels = c("M1 Cleaned","M1 Size","M2 Cleaned","M2 Size"),
          covariate.labels = c("Response Rate","Baseline Pile Area","P1/P2 Monitoring"),
          keep = c("mean.rrate", "b.pile.area_m", "p1.p2.monitoring"),
          add.lines = list(c("Covariates", "Yes", "Yes","Yes","Yes")),
          p = list(rrate_m1.dc[[1]][2:4,4], rrate_m1c[[1]][2:4,4], rrate_m2.dc[[1]][2:4,4], rrate_m2c[[1]][2:4,4]),
          se = list(rrate_m1.dc[[1]][2:4,2], rrate_m1c[[1]][2:4,2], rrate_m2.dc[[1]][2:4,2], rrate_m2c[[1]][2:4,2]),
          notes.label = "Note: two-tailed tests",
          column.sep.width = "1pt",
          df = FALSE,intercept.bottom = TRUE)


## Table J5: Subgroup Effects; Reported Quality of Initial Waste Services -----
cons.service_m1 <- lm(m1.size_final~waste.service_modal+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(cons.service_m1)
cons.service_m1c <- lm.cluster(cons.service_m1, hte_dta$zone.id)

cons.service_m2  <- lm(m2.size_final~waste.service_modal+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(cons.service_m2)
cons.service_m2c <- lm.cluster(cons.service_m2, hte_dta$zone.id)

cons.service_m1.d <- lm(m1.waste.pile_d~waste.service_modal+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(cons.service_m1.d)
cons.service_m1.dc <- lm.cluster(cons.service_m1.d, hte_dta$zone.id)

cons.service_m2.d <- lm(m2.waste.pile_d~waste.service_modal+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(cons.service_m2.d)
cons.service_m2.dc <- lm.cluster(cons.service_m2.d, hte_dta$zone.id)

stargazer(cons.service_m1.d, cons.service_m1, cons.service_m2.d, cons.service_m2, type = "latex",
          title="Estimated Effects of Treatment Conditional on Baseline Quality of Service Provision",
          label="overall_table",
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No) or Waste Pile Size ($m^{2}$)",
          dep.var.labels = c("M1 Cleaned","M1 Size","M2 Cleaned","M2 Size"),
          covariate.labels = c("Reported Service Quality","Baseline Pile Area","P1/P2 Monitoring"),
          keep = c("waste.service_modal", "b.pile.area_m", "p1.p2.monitoring"),
          add.lines = list(c("Covariates", "Yes", "Yes","Yes","Yes")),
          p = list(cons.service_m1.dc[[1]][2:4,4], cons.service_m1c[[1]][2:4,4], cons.service_m2.dc[[1]][2:4,4], cons.service_m2c[[1]][2:4,4]),
          se = list(cons.service_m1.dc[[1]][2:4,2], cons.service_m1c[[1]][2:4,2], cons.service_m2.dc[[1]][2:4,2], cons.service_m2c[[1]][2:4,2]),
          notes.label = "Note: two-tailed tests",
          column.sep.width = "1pt",
          df = FALSE,intercept.bottom = TRUE)


## Table J6: Subgroup Effects; Reported Zone-Level Dissatisfaction -----
cons.dis_m1 <- lm(m1.size_final~dissatisfaction_modal+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(cons.dis_m1)
cons.dis_m1c <- lm.cluster(cons.dis_m1, hte_dta$zone.id)

cons.dis_m2 <- lm(m2.size_final~dissatisfaction_modal+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(cons.dis_m2)
cons.dis_m2c <- lm.cluster(cons.dis_m2, hte_dta$zone.id)

cons.dis_m1.d <- lm(m1.waste.pile_d~dissatisfaction_modal+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(cons.dis_m1.d)
cons.dis_m1.dc <- lm.cluster(cons.dis_m1.d, hte_dta$zone.id)

cons.dis_m2.d <- lm(m2.waste.pile_d~dissatisfaction_modal+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(cons.dis_m2.d)
cons.dis_m2.dc <- lm.cluster(cons.dis_m2.d, hte_dta$zone.id)

stargazer(cons.dis_m1.d, cons.dis_m1, cons.dis_m2.d, cons.dis_m2, type = "latex",
          title="Estimated Effects of Treatment Conditional on Zone-Level Dissatisfaction at Baseline",
          label="dissatisfaction",
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No) or Waste Pile Size ($m^{2}$)",
          dep.var.labels = c("M1 Cleaned","M1 Size","M2 Cleaned","M2 Size"),
          covariate.labels = c("Dissatisfaction","Baseline Pile Area","P1/P2 Monitoring"),
          keep = c("dissatisfaction_modal", "b.pile.area_m", "p1.p2.monitoring"),
          p = list(cons.dis_m1.dc[[1]][2:4,4], cons.dis_m1c[[1]][2:4,4], cons.dis_m2.dc[[1]][2:4,4], cons.dis_m2c[[1]][2:4,4]),
          se = list(cons.dis_m1.dc[[1]][2:4,2], cons.dis_m1c[[1]][2:4,2], cons.dis_m2.dc[[1]][2:4,2], cons.dis_m2c[[1]][2:4,2]),
          add.lines = list(c("Covariates", "Yes", "Yes","Yes","Yes")),
          notes.label = "Note: two-tailed tests",
          column.sep.width = "1pt",
          df = FALSE,intercept.bottom = TRUE)

## Table J7: Subgroup Effects; Reported Zone-Level Consistency -----
over.bad_m1 <- lm(m1.size_final~prop.deviant_overall+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(over.bad_m1)
over.bad_m1c <- lm.cluster(over.bad_m1, hte_dta$zone.id)

over.bad_m2 <- lm(m2.size_final~prop.deviant_overall+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta[hte_dta$delt.b_m2pile.area_new<6000,])
summary(over.bad_m2)
over.bad_m2c <- lm.cluster(over.bad_m2, hte_dta$zone.id)

over.bad_m1.d <- lm(m1.waste.pile_d~prop.deviant_overall+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(over.bad_m1.d)
over.bad_m1.dc <- lm.cluster(over.bad_m1.d, hte_dta$zone.id)

over.bad_m2.d <- lm(m2.waste.pile_d~prop.deviant_overall+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = hte_dta)
summary(over.bad_m2.d)
over.bad_m2.dc <- lm.cluster(over.bad_m2.d, hte_dta$zone.id)

stargazer(over.bad_m1.d, over.bad_m1, over.bad_m2.d, over.bad_m2, type = "latex",
          title="Treatment Effect of Citizen Reporting Conditional on Consistency of Zone-Level Reports on Service Quality",
          label="overall_table",
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No) or Waste Pile Size ($m^{2}$)",
          dep.var.labels = c("M1 Cleaned","M1 Size","M2 Cleaned","M2 Size"),
          covariate.labels = c("Inconsistency","Baseline Pile Area","P1/P2 Monitoring"),
          keep = c("prop.deviant_overall", "b.pile.area_m", "p1.p2.monitoring"),
          p = list(over.bad_m1.dc[[1]][2:4,4], over.bad_m1c[[1]][2:4,4], over.bad_m2.dc[[1]][2:4,4], over.bad_m2c[[1]][2:4,4]),
          se = list(over.bad_m1.dc[[1]][2:4,2], over.bad_m1c[[1]][2:4,2], over.bad_m2.dc[[1]][2:4,2], over.bad_m2c[[1]][2:4,2]),
          add.lines = list(c("Covariates", "Yes", "Yes","Yes","Yes")),
          notes.label = "Note: two-tailed tests",
          column.sep.width = "1pt",
          df = FALSE,intercept.bottom = TRUE)




## Tables J8 & J9: HTE; Distance to Division HQ -----
disthq_m1 <- lm(m1.size_final~treat*pile_divhq_distKM+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = subsetB_dta)
summary(disthq_m1)
disthq_m1c <- lm.cluster(disthq_m1, subsetB_dta$zone.id)

disthq_m2 <- lm(m2.size_final~treat*pile_divhq_distKM+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = subsetB_dta)
summary(disthq_m2)
disthq_m2c <- lm.cluster(disthq_m2, subsetB_dta$zone.id)

tthq_m1 <- lm(m1.size_final~treat*pile_divhq_tt+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = subsetB_dta)
summary(tthq_m1)
tthq_m1c <- lm.cluster(tthq_m1, subsetB_dta$zone.id)

tthq_m2 <- lm(m2.size_final~treat*pile_divhq_tt+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = subsetB_dta)
summary(tthq_m2)
tthq_m2c <- lm.cluster(tthq_m2, subsetB_dta$zone.id)

disthq_m1.d <- lm(m1.waste.pile_d~treat*pile_divhq_distKM+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = subsetB_dta)
summary(disthq_m1.d)
disthq_m1.dc <- lm.cluster(disthq_m1.d, subsetB_dta$zone.id)

disthq_m2.d <- lm(m2.waste.pile_d~treat*pile_divhq_distKM+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = subsetB_dta)
summary(disthq_m2.d)
disthq_m2.dc <- lm.cluster(disthq_m2.d, subsetB_dta$zone.id)

tthq_m1.d <- lm(m1.waste.pile_d~treat*pile_divhq_tt+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = subsetB_dta)
summary(tthq_m1.d)
tthq_m1.dc <- lm.cluster(tthq_m1.d, subsetB_dta$zone.id)

tthq_m2.d <- lm(m2.waste.pile_d~treat*pile_divhq_tt+b.pile.area_m+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016, data = subsetB_dta)
summary(tthq_m2.d)
tthq_m2.dc <- lm.cluster(tthq_m2.d, subsetB_dta$zone.id)

#Table I8
vars <- c("treat", "pile_divhq_distKM", "treat:pile_divhq_distKM", "b.pile.area_m", "p1.p2.monitoring")
stargazer(disthq_m1, disthq_m1.d, disthq_m2, disthq_m2.d, type = "latex",
          title="Treatment Effect of Citizen Reporting Conditional on Distance (km) to Nearest KCCA Division HQ",
          label="table:hte_distance",
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No) or Waste Pile Size ($m^{2}$)",
          dep.var.labels = c("M1 Cleaned","M1 Size","M2 Cleaned","M2 Size"),
          covariate.labels = c("Treatment", "Distance", "Baseline Pile Area","P1/P2 Monitoring", "Treatment*Distance"),
          keep = vars,
          p = list(disthq_m1c[[1]][vars,"Pr(>|t|)"], disthq_m1.dc[[1]][vars,"Pr(>|t|)"], disthq_m2c[[1]][vars,"Pr(>|t|)"], disthq_m2.dc[[1]][vars,"Pr(>|t|)"]),
          se = list(disthq_m1c[[1]][vars,"Std. Error"], disthq_m1.dc[[1]][vars,"Std. Error"], disthq_m2c[[1]][vars,"Std. Error"], disthq_m2.dc[[1]][vars,"Std. Error"]),
          add.lines = list(c("Covariates", "Yes", "Yes","Yes","Yes")),
          notes.label = "Note: two-tailed tests",
          column.sep.width = "1pt",
          df = FALSE,intercept.bottom = TRUE)

#Table I9
vars <- c("treat", "pile_divhq_tt", "treat:pile_divhq_tt", "b.pile.area_m", "p1.p2.monitoring")
stargazer(tthq_m1, tthq_m1.d, tthq_m2, tthq_m2.d, type = "latex",
          title="Treatment Effect of Citizen Reporting Conditional on Travel Time (min.) to Nearest KCCA Division HQ",
          label="table:hte_traveltime",
          dep.var.caption = "DV: Cleaned (0: Yes, 1: No) or Waste Pile Size ($m^{2}$)",
          dep.var.labels = c("M1 Cleaned","M1 Size","M2 Cleaned","M2 Size"),
          covariate.labels = c("Treatment", "Travel Time", "Baseline Pile Area","P1/P2 Monitoring", "Treatment*Travel Time"),
          keep = vars,
          p = list(tthq_m1c[[1]][vars,"Pr(>|t|)"], tthq_m1.dc[[1]][vars,"Pr(>|t|)"], tthq_m2c[[1]][vars,"Pr(>|t|)"], tthq_m2.dc[[1]][vars,"Pr(>|t|)"]),
          se = list(tthq_m1c[[1]][vars,"Std. Error"], tthq_m1.dc[[1]][vars,"Std. Error"], tthq_m2c[[1]][vars,"Std. Error"], tthq_m2.dc[[1]][vars,"Std. Error"]),
          add.lines = list(c("Covariates", "Yes", "Yes","Yes","Yes")),
          notes.label = "Note: two-tailed tests",
          column.sep.width = "1pt",
          df = FALSE,intercept.bottom = TRUE)


## Table J10: Summary Statistics for Core Regressions -----
vars_d <- c("unique.id", "zone.id", "treat",
            "b.waste.pile_d", "m1.waste.pile_d", "m2.waste.pile_d",
            "p1.p2.monitoring", "lights.mean", "road.density", "area.km2", "ls.pop_2016", "div")
vars_s <- c("unique.id", "zone.id", "treat",
            "b.pile.area_m", "m1.size_final", "m2.size_final",
            "m1.ea.uwa1", "m1.ea.uwa2", "m2.ea.uwa1", "m2.ea.uwa2",
            "m1.ea.brn1", "m1.ea.brn2", "m2.ea.brn1", "m2.ea.brn2",
            "m1.ea.po1", "m1.ea.po2", "m2.ea.po1", "m2.ea.po2", 
            "p1.p2.monitoring", "lights.mean", "road.density", "area.km2", "ls.pop_2016", "div")
summ.df_d <- subsetA_dta[,names(subsetA_dta)%in%vars_d]
summ.df_s <- subsetB_dta[,names(subsetB_dta)%in%vars_s]

out_s <- c("b.pile.area_m", "m1.size_final", "m2.size_final",
           "m1.ea.uwa1", "m1.ea.uwa2", "m2.ea.uwa1", "m2.ea.uwa2",
           "m1.ea.brn1", "m1.ea.brn2", "m2.ea.brn1", "m2.ea.brn2",
           "m1.ea.po1", "m1.ea.po2", "m2.ea.po1", "m2.ea.po2")
out_d <- c("b.waste.pile_d", "m1.waste.pile_d", "m2.waste.pile_d")
cov_c <- c("lights.mean", "road.density", "area.km2", "ls.pop_2016")
cov_f <- c("p1.p2.monitoring", "div")

#Columns should be mean, min, max
outcome_size <- data.frame(Type=c("Outcome", rep("", length(out_s)-1)),
                           Mean=sapply(summ.df_s[,names(summ.df_s)%in%out_s], mean, na.rm=T),
                           Min=sapply(summ.df_s[,names(summ.df_s)%in%out_s], min, na.rm=T),
                           Max=sapply(summ.df_s[,names(summ.df_s)%in%out_s], max, na.rm=T),
                           Modal="-")

outcome_pile <- data.frame(Type=c("Outcome", rep("", length(out_d)-1)),
                           Mean="-",
                           Min="-",
                           Max="-",
                           Modal=sapply(summ.df_d[,names(summ.df_d)%in%out_d], modal, na.rm=T))

covariate_c <- data.frame(Type=c("Covariate", rep("", length(cov_c)-1)),
                          Mean=sapply(summ.df_s[,names(summ.df_s)%in%cov_c], mean, na.rm=T),
                          Min=sapply(summ.df_s[,names(summ.df_s)%in%cov_c], min, na.rm=T),
                          Max=sapply(summ.df_s[,names(summ.df_s)%in%cov_c], max, na.rm=T),
                          Modal="-")

covariate_f <- data.frame(Type=c("Covariate", rep("", length(cov_f)-1)),
                          Mean="-",
                          Min="-",
                          Max="-",
                          Modal=sapply(summ.df_s[,names(summ.df_s)%in%cov_f], modal, na.rm=T))

master_out <- rbind(outcome_size, outcome_pile, covariate_c, covariate_f)

master_out["m1.waste.pile_d", "Mean"] <- mean(as.numeric(summ.df_d$m1.waste.pile_d), na.rm=T)
master_out["m2.waste.pile_d", "Mean"] <- mean(as.numeric(summ.df_d$m2.waste.pile_d), na.rm=T)
master_out["p1.p2.monitoring", "Mean"] <- mean(as.numeric(summ.df_s$p1.p2.monitoring), na.rm=T)

master_out <- master_out[c("m1.size_final", "m2.size_final",
                           "m1.waste.pile_d", "m2.waste.pile_d",
                           #  "m1.ea.uwa1", "m1.ea.uwa2", "m2.ea.uwa1", "m2.ea.uwa2",
                           #  "m1.ea.brn1", "m1.ea.brn2", "m2.ea.brn1", "m2.ea.brn2",
                           # "m1.ea.po1", "m1.ea.po2", "m2.ea.po1", "m2.ea.po2",
                           "div", "p1.p2.monitoring", "b.pile.area_m",
                           "lights.mean", "road.density", "area.km2", "ls.pop_2016"),]
row.names(master_out) <- c("Pile Size, M1 $(m^2)$", "Pile Size, M2 $(m^2)$", 
                           "Pile Dummy, M1", "Pile Dummy, M2",
                           "Division", "P1/P2 Monitoring", "Pile Area, Baseline $(m^2)$", 
                           "Zone-NTL $(nW cm^{???2} sr^{???1})$", "Zone-Road Density", "Zone-Area $(km^2)$", "Zone-Population")

xtable(master_out)


## Table J11: RI, Diff-in-Diff Specification -----
did.dta <- data.frame(pile.id = rep(as.character(unique(subsetB_dta$unique.id)), each=3),
                      audit = rep(c("b", "m1", "m2"), length(unique(subsetB_dta$unique.id))),
                      post.trt = NA,
                      zone.id = NA,
                      treat = NA,
                      p1.p2.monitoring = NA,
                      pile.size = NA,
                      pile.d = NA, 
                      lights.mean = NA,
                      road.density = NA,
                      div = NA, 
                      area.km2 = NA,
                      ls.pop_2016 = NA)

id <- as.character(unique(did.dta$pile.id))  

for(i in 1:length(id)) {
  did.dta$pile.size[did.dta$audit=="b"&did.dta$pile.id==id[i]] <- subsetB_dta$b.pile.area_m[subsetB_dta$unique.id==id[i]]
  did.dta$pile.size[did.dta$audit=="m1"&did.dta$pile.id==id[i]] <- subsetB_dta$m1.size_final[subsetB_dta$unique.id==id[i]]
  did.dta$pile.size[did.dta$audit=="m2"&did.dta$pile.id==id[i]] <- subsetB_dta$m2.size_final[subsetB_dta$unique.id==id[i]]
  
  did.dta$pile.d[did.dta$audit=="b"&did.dta$pile.id==id[i]] <- subsetB_dta$b.waste.pile_d[subsetB_dta$unique.id==id[i]]
  did.dta$pile.d[did.dta$audit=="m1"&did.dta$pile.id==id[i]] <- subsetB_dta$m1.waste.pile_d[subsetB_dta$unique.id==id[i]]
  did.dta$pile.d[did.dta$audit=="m2"&did.dta$pile.id==id[i]] <- subsetB_dta$m2.waste.pile_d[subsetB_dta$unique.id==id[i]]
  
  did.dta$zone.id[did.dta$pile.id==id[i]] <- subsetB_dta$zone.id[subsetB_dta$unique.id==id[i]]
  did.dta$treat[did.dta$pile.id==id[i]] <- subsetB_dta$treat[subsetB_dta$unique.id==id[i]]
  did.dta$p1.p2.monitoring[did.dta$pile.id==id[i]] <- subsetB_dta$p1.p2.monitoring[subsetB_dta$unique.id==id[i]]
  did.dta$lights.mean[did.dta$pile.id==id[i]] <- subsetB_dta$lights.mean[subsetB_dta$unique.id==id[i]]
  did.dta$road.density[did.dta$pile.id==id[i]] <- subsetB_dta$road.density[subsetB_dta$unique.id==id[i]]
  did.dta$div[did.dta$pile.id==id[i]] <- subsetB_dta$div[subsetB_dta$unique.id==id[i]]
  did.dta$area.km2[did.dta$pile.id==id[i]] <- subsetB_dta$area.km2[subsetB_dta$unique.id==id[i]]
  did.dta$ls.pop_2016[did.dta$pile.id==id[i]] <- subsetB_dta$ls.pop_2016[subsetB_dta$unique.id==id[i]]
}

did.dta$post.trt <- ifelse(did.dta$audit=="b", "p0", "p1")
summary(did.dta)

did_size <- did.ri(formula = pile.size~treat+post.trt+treat*post.trt+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                   dta = did.dta,
                   treat.var = "treat",
                   clust_var = did.dta$zone.id,
                   m=length(unique(did.dta$zone.id))/2,
                   sims=10000)


did_table <- data.frame(matrix(nrow=4, ncol=1))
rownames(did_table) <- c("Treatment*Post-Treatment","Standard Error", "p-value", "N")
colnames(did_table) <- c("Pile Size")

did_table[1,1] <- did_size$ate[[1]]
did_table[2,1] <- did_size$se[[1]]
did_table[3,1] <- did_size$p.one.way.lesser[[1]]
did_table[4,1] <- did_size$N[[1]]

out <- list(did_table)
attr(out, "message") <- c("Note: results calculated using cleaned waste pile size measurements.")
print(xtableList(out, caption="RI Results: Diff-in-Diff, Pile Size (Cleaned)"), caption.placement="top", digits=3)


## Table K1: Minimum Detectable Effect ----
ri.cph3.analysis <- subsetB_dta

set.seed(202)
#Waste Accumulation
ri.m1.size_new <- lm.ri(formula = m1.size_final~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                        dta = ri.cph3.analysis,
                        treat.var = "treat",
                        clust_var = ri.cph3.analysis$zone.id,
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000) #This runs with NAs removed from the treat and cluster variables

ri.m2.size_new <- lm.ri(formula = m2.size_final~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                        dta = ri.cph3.analysis,
                        treat.var = "treat",
                        clust_var = ri.cph3.analysis$zone.id,
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)


#Pile Dummies
ri.cph3.analysis$m1.waste.pile_d2 <- ifelse(ri.cph3.analysis$m1.waste.pile_d2=="Yes", 1, ri.cph3.analysis$m1.waste.pile_d2)
ri.cph3.analysis$m1.waste.pile_d2 <- ifelse(ri.cph3.analysis$m1.waste.pile_d2=="No", 0, ri.cph3.analysis$m1.waste.pile_d2)
ri.wpd1_adj <- lm.ri(formula = m1.waste.pile_d2~treat+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                     dta = ri.cph3.analysis,
                     treat.var = "treat",
                     clust_var = ri.cph3.analysis$zone.id,
                     m=length(unique(ri.cph3.analysis$zone.id))/2,
                     sims=10000)

ri.cph3.analysis$m2.waste.pile_d2 <- ifelse(ri.cph3.analysis$m2.waste.pile_d2=="Yes", 1, ri.cph3.analysis$m2.waste.pile_d2)
ri.cph3.analysis$m2.waste.pile_d2 <- ifelse(ri.cph3.analysis$m2.waste.pile_d2=="No", 0, ri.cph3.analysis$m2.waste.pile_d2)
ri.wpd2_adj <- lm.ri(formula = m2.waste.pile_d2~treat+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                     dta = ri.cph3.analysis,
                     treat.var = "treat",
                     clust_var = ri.cph3.analysis$zone.id,
                     m=length(unique(ri.cph3.analysis$zone.id))/2,
                     sims=10000)

#Rank Test
ri.rank_m1_new <- lm.ri(formula=rank.m1_new~treat+p1.p2.monitoring+rank.b_new+lights.mean+road.density+area.km2+div+ls.pop_2016+rank.b,
                        dta=ri.cph3.analysis,
                        treat.var="treat",
                        clust_var = ri.cph3.analysis$zone.id, 
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)

ri.rank_m2_new <- lm.ri(formula=rank.m2_new~treat+p1.p2.monitoring+rank.b_new+lights.mean+road.density+area.km2+div+ls.pop_2016+rank.b,
                        dta=ri.cph3.analysis,
                        treat.var="treat",
                        clust_var = ri.cph3.analysis$zone.id, 
                        m=length(unique(ri.cph3.analysis$zone.id))/2,
                        sims=10000)


te.seq <- c(seq(from=-1, to=-4, by=-0.1),seq(from=-8, to=-10, by=-0.1))
power.store.size <- rep(NA, length(te.seq))
power.store.dummy <- rep(NA, length(te.seq))
power.store.rank <- rep(NA, length(te.seq))


for (i in 1:length(te.seq)){
  
  ri.reverse.m1 <- lm.ri.power.all3(formula.size = m1.size_final~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                                    formula.dummy = m1.waste.pile_d2~treat+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                                    formula.rank = rank.m1_new~treat+p1.p2.monitoring+rank.b_new+lights.mean+road.density+area.km2+div+ls.pop_2016+rank.b,
                                    size.bar = quantile(sort(ri.m1.size_new$ate.samp.dist), probs = 0.05),
                                    dummy.bar = quantile(sort(ri.wpd1_adj$ate.samp.dist), probs = 0.05),
                                    rank.bar = quantile(sort(ri.rank_m1_new$ate.samp.dist), probs = 0.05),
                                    dta = ri.cph3.analysis,
                                    treat.var = "treat",
                                    outcome.var = "m1.size_final",
                                    binary.var = "m1.waste.pile_d2",
                                    rank.var = "rank.m1_new",
                                    sims = 2000,
                                    clust_var = ri.cph3.analysis$zone.id,
                                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                                    te = te.seq[i]
  )
  
  power.store.size[i] <- ri.reverse.m1$power.size
  power.store.dummy[i] <- ri.reverse.m1$power.dummy
  power.store.rank[i] <- ri.reverse.m1$power.rank
}


te.seq2 <- c(seq(from=-2, to=-5, by=-0.1),seq(from=-8, to=-10, by=-0.1))

power.store.size2 <- rep(NA, length(te.seq2))
power.store.dummy2 <- rep(NA, length(te.seq2))
power.store.rank2 <- rep(NA, length(te.seq2))


for (i in 1:length(te.seq2)){
  
  ri.reverse.m2 <- lm.ri.power.all3(formula.size = m2.size_final~treat+p1.p2.monitoring+b.pile.area_m+lights.mean+road.density+div+area.km2+ls.pop_2016,
                                    formula.dummy = m2.waste.pile_d2~treat+p1.p2.monitoring+lights.mean+road.density+div+area.km2+ls.pop_2016,
                                    formula.rank = rank.m2_new~treat+p1.p2.monitoring+rank.b_new+lights.mean+road.density+area.km2+div+ls.pop_2016+rank.b,
                                    size.bar = quantile(sort(ri.m2.size_new$ate.samp.dist), probs = 0.05),
                                    dummy.bar = quantile(sort(ri.wpd2_adj$ate.samp.dist), probs = 0.05),
                                    rank.bar = quantile(sort(ri.rank_m2_new$ate.samp.dist), probs = 0.05),
                                    dta = ri.cph3.analysis,
                                    treat.var = "treat",
                                    outcome.var = "m2.size_final",
                                    binary.var = "m2.waste.pile_d2",
                                    rank.var = "rank.m2_new",
                                    sims = 2000,
                                    clust_var = ri.cph3.analysis$zone.id,
                                    m=length(unique(ri.cph3.analysis$zone.id))/2,
                                    te = te.seq2[i]
  )
  
  power.store.size2[i] <- ri.reverse.m2$power.size
  power.store.dummy2[i] <- ri.reverse.m2$power.dummy
  power.store.rank2[i] <- ri.reverse.m2$power.rank
}

#Plots
plot(te.seq,power.store.size)
plot(te.seq,power.store.dummy)
plot(te.seq,power.store.rank)
plot(te.seq2,power.store.size2)
plot(te.seq2,power.store.dummy2)
plot(te.seq2,power.store.rank2)

#Table
mid1 <- c(max(te.seq[power.store.size>0.8]),
          max(te.seq[power.store.dummy>0.8]),
          max(te.seq[power.store.rank>0.8])
)

mid1.sd.c <- sd(ri.cph3.analysis$m1.size_final[ri.cph3.analysis$treat==0])

mid1.ses <- -round(mid1/mid1.sd.c,2)

mid2 <- c("--", #max(te.seq[power.store.size2>0.8]),
          max(te.seq[power.store.dummy2>0.8]),
          max(te.seq[power.store.rank2>0.8])
)

mid2.sd.c <- sd(ri.cph3.analysis$m2.size_final[ri.cph3.analysis$treat==0])

mid2.ses <- c("--",-round(as.numeric(mid2[2:3])/mid2.sd.c,2))

#Table
tab.dta <- data.matrix(rbind(mid1,mid1.ses,mid2,mid2.ses))
rownames(tab.dta) <- c("Midline 1","", "Midline 2","")
colnames(tab.dta) <- c("Pile Size","Pile Cleaned","Pile Rank")
xtable(tab.dta, summary=FALSE,
       label = "SI-tab-MDE",
       caption = "Minimum Detectable Effects $(m^2)$", 
       align = "lccc") # Tue Dec  4 08:55:21 2018





